# libraries that need to be installed and loaded
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  3.0.0     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(mice)
## 
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggplot2)
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(lattice)
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(monomvn)
## Loading required package: pls
## 
## Attaching package: 'pls'
## The following object is masked from 'package:caret':
## 
##     R2
## The following object is masked from 'package:stats':
## 
##     loadings
## Loading required package: lars
## Loaded lars 1.2
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ineq)
library(car)
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(knitr)
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following objects are masked from 'package:reshape2':
## 
##     colsplit, melt, recast
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths

Data Preparation

# read in data
require(haven)
## Loading required package: haven
sashhinc=read_sas("hhinc_10.sas7bdat")

# let's make this more interpretable by letting 1989 be year 0
# also let's convert income to thousands of yuan
sashhinc$cyear=sashhinc$WAVE-1989
sashhinc$thousands=sashhinc$hhincpc_cpi/1000

# replace NAs w/ null
sashhinc[is.na(sashhinc)] = 0
sashhinc_trimmed = sashhinc
# shift variables to positive in the case we want to use a log transform 
sashhinc$thousands_shifted=sashhinc$thousands - (min(sashhinc$thousands)-0.01)

# change to categorical variables
sashhinc$urban <- as.factor(sashhinc$urban)
sashhinc$hhid <- as.factor(sashhinc$hhid)
sashhinc$t1 <- as.factor(sashhinc$t1)

## .1% removed- put in sashhinc_trimmed df
lower = quantile(sashhinc$thousands, 0.001)
upper = quantile(sashhinc$thousands, 0.999)
sashhinc_trimmed = subset(sashhinc, lower < thousands & thousands < upper)
sashhinc_trimmed$thousands_shifted=sashhinc_trimmed$thousands - (min(sashhinc_trimmed$thousands)-0.01)
# take a peek at dataset
colnames(sashhinc)
##  [1] "WAVE"              "hhid"              "HHBUS"            
##  [4] "HHBUSimp"          "HHFARM"            "HHFARMimp"        
##  [7] "HHFISH"            "HHFISHimp"         "HHGARDimp"        
## [10] "hhgard"            "HHLVST"            "HHLVSTimp"        
## [13] "HHOTHR"            "HHOTHRimp"         "HHSUB"            
## [16] "HHSUBimp"          "HHRETIRE"          "HHRETIREimp"      
## [19] "hhNRwage"          "HHNRWAGEimp"       "HHINC"            
## [22] "hhincgross"        "hhexpense"         "HHINCimp"         
## [25] "commid"            "t1"                "urban"            
## [28] "T6_A"              "T6_B"              "hhsize"           
## [31] "hhinc_pc"          "CPI1988"           "CPI2015"          
## [34] "index_new"         "index_old"         "hhinc_cpi"        
## [37] "hhincpc_cpi"       "hhincgross_cpi"    "hhexpense_cpi"    
## [40] "cyear"             "thousands"         "thousands_shifted"
summary(sashhinc)
##       WAVE             hhid           HHBUS            HHBUSimp      
##  Min.   :1989   321102003:   10   Min.   :-564000   Min.   :0.00000  
##  1st Qu.:1997   321102006:   10   1st Qu.:      0   1st Qu.:0.00000  
##  Median :2004   321102007:   10   Median :      0   Median :0.00000  
##  Mean   :2003   321102009:   10   Mean   :   2227   Mean   :0.02936  
##  3rd Qu.:2011   321102010:   10   3rd Qu.:      0   3rd Qu.:0.00000  
##  Max.   :2015   321102012:   10   Max.   : 876000   Max.   :1.00000  
##                 (Other)  :43611                                      
##      HHFARM          HHFARMimp          HHFISH            HHFISHimp
##  Min.   :-147522   Min.   :0.0000   Min.   :-32500.00   Min.   :0  
##  1st Qu.:      0   1st Qu.:0.0000   1st Qu.:     0.00   1st Qu.:0  
##  Median :      0   Median :0.0000   Median :     0.00   Median :0  
##  Mean   :   1710   Mean   :0.1131   Mean   :    51.85   Mean   :0  
##  3rd Qu.:   1500   3rd Qu.:0.0000   3rd Qu.:     0.00   3rd Qu.:0  
##  Max.   :1099564   Max.   :1.0000   Max.   : 99000.00   Max.   :0  
##                                                                    
##    HHGARDimp           hhgard           HHLVST            HHLVSTimp      
##  Min.   :0.00000   Min.   :-76400   Min.   :-100199.0   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:     0   1st Qu.:      0.0   1st Qu.:0.00000  
##  Median :0.00000   Median :     0   Median :      0.0   Median :0.00000  
##  Mean   :0.05615   Mean   :  2054   Mean   :    259.6   Mean   :0.05461  
##  3rd Qu.:0.00000   3rd Qu.:   880   3rd Qu.:      0.0   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :846000   Max.   : 138957.7   Max.   :1.00000  
##                                                                          
##      HHOTHR          HHOTHRimp           HHSUB             HHSUBimp      
##  Min.   :      0   Min.   :0.00000   Min.   :     0.0   Min.   :0.00000  
##  1st Qu.:      0   1st Qu.:0.00000   1st Qu.:     0.0   1st Qu.:0.00000  
##  Median :      0   Median :0.00000   Median :     0.0   Median :0.00000  
##  Mean   :   2050   Mean   :0.02521   Mean   :   381.5   Mean   :0.06487  
##  3rd Qu.:   1000   3rd Qu.:0.00000   3rd Qu.:   117.5   3rd Qu.:0.00000  
##  Max.   :1019999   Max.   :1.00000   Max.   :216000.0   Max.   :1.00000  
##                                                                          
##     HHRETIRE       HHRETIREimp          hhNRwage        HHNRWAGEimp     
##  Min.   :     0   Min.   :0.000000   Min.   :      0   Min.   :0.00000  
##  1st Qu.:     0   1st Qu.:0.000000   1st Qu.:      0   1st Qu.:0.00000  
##  Median :     0   Median :0.000000   Median :    500   Median :0.00000  
##  Mean   :  4163   Mean   :0.004809   Mean   :  12537   Mean   :0.04206  
##  3rd Qu.:     0   3rd Qu.:0.000000   3rd Qu.:  10000   3rd Qu.:0.00000  
##  Max.   :180000   Max.   :1.000000   Max.   :4800000   Max.   :1.00000  
##                                                                         
##      HHINC           hhincgross        hhexpense          HHINCimp     
##  Min.   :-564000   Min.   :      0   Min.   :      0   Min.   :0.0000  
##  1st Qu.:   4251   1st Qu.:   5110   1st Qu.:      0   1st Qu.:0.0000  
##  Median :  11160   Median :  12800   Median :    300   Median :0.0000  
##  Mean   :  25434   Mean   :  28052   Mean   :   2597   Mean   :0.3009  
##  3rd Qu.:  28844   3rd Qu.:  31750   3rd Qu.:   1669   3rd Qu.:1.0000  
##  Max.   :4800000   Max.   :4800000   Max.   :1116000   Max.   :1.0000  
##                                                                        
##      commid             t1        urban          T6_A             T6_B       
##  Min.   :111101   52     : 4938   0:28483   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.:322101   45     : 4924   1:15188   1st Qu.: 2.000   1st Qu.:0.0000  
##  Median :412301   42     : 4733             Median : 3.000   Median :0.0000  
##  Mean   :379669   41     : 4719             Mean   : 3.249   Mean   :0.3117  
##  3rd Qu.:432404   43     : 4711             3rd Qu.: 4.000   3rd Qu.:0.0000  
##  Max.   :552304   32     : 4701             Max.   :14.000   Max.   :9.0000  
##                   (Other):14945                                              
##      hhsize          hhinc_pc          CPI1988         CPI2015    
##  Min.   : 0.000   Min.   :-188000   Min.   :0.930   Min.   :0.27  
##  1st Qu.: 2.000   1st Qu.:   1208   1st Qu.:1.810   1st Qu.:0.53  
##  Median : 3.000   Median :   3325   Median :2.370   Median :0.69  
##  Mean   : 3.561   Mean   :   8494   Mean   :2.321   Mean   :0.68  
##  3rd Qu.: 4.000   3rd Qu.:   9467   3rd Qu.:2.860   3rd Qu.:0.84  
##  Max.   :15.000   Max.   :1200000   Max.   :4.530   Max.   :1.33  
##                                                                   
##    index_new      index_old       hhinc_cpi        hhincpc_cpi     
##  Min.   :0.27   Min.   :0.930   Min.   :-679518   Min.   :-226506  
##  1st Qu.:0.53   1st Qu.:1.810   1st Qu.:   8833   1st Qu.:   2502  
##  Median :0.69   Median :2.370   Median :  18132   Median :   5344  
##  Mean   :0.68   Mean   :2.321   Mean   :  31353   Mean   :  10178  
##  3rd Qu.:0.84   3rd Qu.:2.860   3rd Qu.:  37375   3rd Qu.:  11959  
##  Max.   :1.33   Max.   :4.530   Max.   :4528302   Max.   :1132076  
##                                                                    
##  hhincgross_cpi    hhexpense_cpi           cyear         thousands       
##  Min.   :      0   Min.   :      0.0   Min.   : 0.00   Min.   :-226.506  
##  1st Qu.:  10753   1st Qu.:      0.0   1st Qu.: 8.00   1st Qu.:   2.502  
##  Median :  20703   Median :    571.4   Median :15.00   Median :   5.344  
##  Mean   :  35207   Mean   :   3827.3   Mean   :13.83   Mean   :  10.178  
##  3rd Qu.:  41357   3rd Qu.:   3135.3   3rd Qu.:22.00   3rd Qu.:  11.959  
##  Max.   :4528302   Max.   :1344578.3   Max.   :26.00   Max.   :1132.075  
##                                                                          
##  thousands_shifted
##  Min.   :   0.01  
##  1st Qu.: 229.02  
##  Median : 231.86  
##  Mean   : 236.69  
##  3rd Qu.: 238.47  
##  Max.   :1358.59  
## 
# select variables we need for df
df = dplyr::select(sashhinc,
            hhid,
            WAVE,
            hhinc_cpi, 
            hhincpc_cpi, 
            HHBUS, 
            HHBUSimp, 
            HHFARM, 
            HHFARMimp, 
            HHFISH, 
            HHFISHimp, 
            hhgard, 
            HHGARDimp,
            HHLVST,
            HHLVSTimp,
            hhNRwage,
            HHNRWAGEimp,
            HHOTHR,
            HHOTHRimp,
            HHRETIRE,
            HHRETIREimp,
            HHSUB,
            HHSUBimp,
            t1,
            urban,
            thousands,
            cyear,
            thousands_shifted)

names(df)<- toupper(names(df))

#add primary income source variable
df <- df %>% 
        mutate(Primary = colnames(df[, c("HHBUS", "HHFARM", "HHFISH", "HHGARD", "HHLVST", "HHOTHR", "HHRETIRE", "HHSUB")])[apply(df[, c("HHBUS", "HHFARM", "HHFISH", "HHGARD", "HHLVST", "HHOTHR", "HHRETIRE", "HHSUB")], 1, which.max)])


# select variables we need for df w/o outliers
df_trimmed = dplyr::select(sashhinc_trimmed,
            hhid,
            WAVE,
            hhinc_cpi, 
            hhincpc_cpi, 
            HHBUS, 
            HHBUSimp, 
            HHFARM, 
            HHFARMimp, 
            HHFISH, 
            HHFISHimp, 
            hhgard, 
            HHGARDimp,
            HHLVST,
            HHLVSTimp,
            hhNRwage,
            HHNRWAGEimp,
            HHOTHR,
            HHOTHRimp,
            HHRETIRE,
            HHRETIREimp,
            HHSUB,
            HHSUBimp,
            t1,
            urban,
            thousands,
            cyear,
            thousands_shifted)

#add primary income source variable
df_trimmed <- df_trimmed %>% 
        mutate(Primary = colnames(df[, c("HHBUS", "HHFARM", "HHFISH", "HHGARD", "HHLVST", "HHOTHR", "HHRETIRE", "HHSUB")])[apply(df_trimmed[, c("HHBUS", "HHFARM", "HHFISH", "hhgard", "HHLVST", "HHOTHR", "HHRETIRE", "HHSUB")], 1, which.max)])

#add variable indicating whether household receives income from specific sources
df <- df %>% 
  mutate(business_ind = factor(as.numeric(HHBUS != 0)),
         farm_ind = factor(as.numeric(HHFARM != 0)),
         fish_ind = factor(as.numeric(HHFISH != 0)),
         gard_ind = factor(as.numeric(HHGARD != 0)),
         lvst_ind = factor(as.numeric(HHLVST != 0)),
         othr_ind = factor(as.numeric(HHOTHR != 0)),
         retire_ind = factor(as.numeric(HHRETIRE != 0)),
         sub_ind = factor(as.numeric(HHSUB != 0)))

names(df)<- toupper(names(df))
df$T1 = as.factor(df$T1)

names(df_trimmed)<- toupper(names(df_trimmed))
df_trimmed$T1 = as.factor(df_trimmed$T1)

head(df)
## # A tibble: 6 x 36
##   HHID   WAVE HHINC_CPI HHINCPC_CPI HHBUS HHBUSIMP HHFARM HHFARMIMP HHFISH
##   <fct> <dbl>     <dbl>       <dbl> <dbl>    <dbl>  <dbl>     <dbl>  <dbl>
## 1 2111…  1989    14175.       4725.     0        0      0         0      0
## 2 2111…  1989    13086.       4362.     0        0      0         0      0
## 3 2111…  1989    15047.       5016.     0        0      0         0      0
## 4 2111…  1989    17020.       5673.     0        0      0         0      0
## 5 2111…  1989    28431.       9477.     0        0      0         0      0
## 6 2111…  1989    11576.       5788.     0        0      0         0      0
## # … with 27 more variables: HHFISHIMP <dbl>, HHGARD <dbl>, HHGARDIMP <dbl>,
## #   HHLVST <dbl>, HHLVSTIMP <dbl>, HHNRWAGE <dbl>, HHNRWAGEIMP <dbl>,
## #   HHOTHR <dbl>, HHOTHRIMP <dbl>, HHRETIRE <dbl>, HHRETIREIMP <dbl>,
## #   HHSUB <dbl>, HHSUBIMP <dbl>, T1 <fct>, URBAN <fct>, THOUSANDS <dbl>,
## #   CYEAR <dbl>, THOUSANDS_SHIFTED <dbl>, PRIMARY <chr>, BUSINESS_IND <fct>,
## #   FARM_IND <fct>, FISH_IND <fct>, GARD_IND <fct>, LVST_IND <fct>,
## #   OTHR_IND <fct>, RETIRE_IND <fct>, SUB_IND <fct>
head(df_trimmed)
## # A tibble: 6 x 28
##   HHID   WAVE HHINC_CPI HHINCPC_CPI HHBUS HHBUSIMP HHFARM HHFARMIMP HHFISH
##   <fct> <dbl>     <dbl>       <dbl> <dbl>    <dbl>  <dbl>     <dbl>  <dbl>
## 1 2111…  1989    14175.       4725.     0        0      0         0      0
## 2 2111…  1989    13086.       4362.     0        0      0         0      0
## 3 2111…  1989    15047.       5016.     0        0      0         0      0
## 4 2111…  1989    17020.       5673.     0        0      0         0      0
## 5 2111…  1989    28431.       9477.     0        0      0         0      0
## 6 2111…  1989    11576.       5788.     0        0      0         0      0
## # … with 19 more variables: HHFISHIMP <dbl>, HHGARD <dbl>, HHGARDIMP <dbl>,
## #   HHLVST <dbl>, HHLVSTIMP <dbl>, HHNRWAGE <dbl>, HHNRWAGEIMP <dbl>,
## #   HHOTHR <dbl>, HHOTHRIMP <dbl>, HHRETIRE <dbl>, HHRETIREIMP <dbl>,
## #   HHSUB <dbl>, HHSUBIMP <dbl>, T1 <fct>, URBAN <fct>, THOUSANDS <dbl>,
## #   CYEAR <dbl>, THOUSANDS_SHIFTED <dbl>, PRIMARY <chr>

Preliminary Checks

# fit model with all interactions
m1 = lmer(THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + 
          CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
          + CYEAR*T1 + URBAN*T1 + T1*PRIMARY
        + (1 | HHID), data=df)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR * URBAN + PRIMARY *  
##     CYEAR + URBAN * PRIMARY + CYEAR * T1 + URBAN * T1 + T1 *  
##     PRIMARY + (1 | HHID)
##    Data: df
## 
## REML criterion at convergence: 367440
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -14.025  -0.284  -0.050   0.163  46.491 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  HHID     (Intercept) 138.9    11.79   
##  Residual             201.0    14.18   
## Number of obs: 43671, groups:  HHID, 9628
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)            -63.47811    6.67646  -9.508
## CYEAR                    3.11303    0.25273  12.318
## URBAN1                  12.85672    2.26241   5.683
## T121                    62.98502    6.71494   9.380
## T123                    52.80192    6.78430   7.783
## T131                    29.81922    9.49085   3.142
## T132                    64.66848    6.71860   9.625
## T137                    64.71300    6.71282   9.640
## T141                    63.39543    6.71487   9.441
## T142                    63.08973    6.71538   9.395
## T143                    66.44409    6.70987   9.902
## T145                    64.97931    6.70591   9.690
## T152                    62.86877    6.70721   9.373
## T155                    57.78970    9.16511   6.305
## PRIMARYHHFARM           13.23116    4.91987   2.689
## PRIMARYHHFISH           -6.92581   19.11678  -0.362
## PRIMARYHHGARD           11.63014    3.89550   2.986
## PRIMARYHHLVST           -0.38807    8.84008  -0.044
## PRIMARYHHOTHR           15.16196    2.18153   6.950
## PRIMARYHHRETIRE         -0.73363    2.05914  -0.356
## PRIMARYHHSUB             6.77936    2.12599   3.189
## CYEAR:URBAN1             0.11153    0.02951   3.780
## CYEAR:PRIMARYHHFARM     -0.39521    0.03386 -11.671
## CYEAR:PRIMARYHHFISH     -0.07912    0.18440  -0.429
## CYEAR:PRIMARYHHGARD     -0.15564    0.03380  -4.605
## CYEAR:PRIMARYHHLVST     -0.13042    0.07271  -1.794
## CYEAR:PRIMARYHHOTHR     -0.13329    0.03705  -3.597
## CYEAR:PRIMARYHHRETIRE    0.14383    0.04066   3.537
## CYEAR:PRIMARYHHSUB      -0.18086    0.04160  -4.347
## URBAN1:PRIMARYHHFARM    -0.56676    1.44833  -0.391
## URBAN1:PRIMARYHHFISH     4.84726    6.73914   0.719
## URBAN1:PRIMARYHHGARD     0.26312    1.03494   0.254
## URBAN1:PRIMARYHHLVST     0.82906    2.03694   0.407
## URBAN1:PRIMARYHHOTHR     0.26005    0.57997   0.448
## URBAN1:PRIMARYHHRETIRE  -0.03134    0.72007  -0.044
## URBAN1:PRIMARYHHSUB      1.22021    0.74191   1.645
## CYEAR:T121              -2.22155    0.25361  -8.760
## CYEAR:T123              -1.88498    0.25607  -7.361
## CYEAR:T131              -0.51588    0.35655  -1.447
## CYEAR:T132              -2.14269    0.25346  -8.454
## CYEAR:T137              -2.39658    0.25314  -9.468
## CYEAR:T141              -2.57461    0.25309 -10.173
## CYEAR:T142              -2.28326    0.25307  -9.022
## CYEAR:T143              -2.52265    0.25321  -9.963
## CYEAR:T145              -2.64231    0.25314 -10.438
## CYEAR:T152              -2.47050    0.25320  -9.757
## CYEAR:T155              -2.34270    0.35351  -6.627
## URBAN1:T121            -13.71257    2.37909  -5.764
## URBAN1:T123             -4.01332    2.51266  -1.597
## URBAN1:T131            -14.29782    2.86328  -4.994
## URBAN1:T132            -13.57151    2.38214  -5.697
## URBAN1:T137            -13.49382    2.35916  -5.720
## URBAN1:T141            -11.25385    2.40654  -4.676
## URBAN1:T142            -13.71377    2.42098  -5.665
## URBAN1:T143            -13.76509    2.37244  -5.802
## URBAN1:T145            -12.02212    2.38655  -5.037
## URBAN1:T152             -8.59692    2.39926  -3.583
## URBAN1:T155            -10.46102    2.61657  -3.998
## T121:PRIMARYHHFARM     -10.63811    4.95069  -2.149
## T123:PRIMARYHHFARM      -7.97679    4.97398  -1.604
## T131:PRIMARYHHFARM     -18.39630   13.01617  -1.413
## T132:PRIMARYHHFARM     -12.93698    4.94431  -2.617
## T137:PRIMARYHHFARM     -11.05728    4.94173  -2.238
## T141:PRIMARYHHFARM     -11.02634    4.92713  -2.238
## T142:PRIMARYHHFARM      -9.56899    4.94251  -1.936
## T143:PRIMARYHHFARM     -13.82735    4.96648  -2.784
## T145:PRIMARYHHFARM     -11.28105    4.93556  -2.286
## T152:PRIMARYHHFARM     -10.91695    4.93148  -2.214
## T155:PRIMARYHHFARM      -4.84221    5.78368  -0.837
## T121:PRIMARYHHFISH       9.40252   20.58883   0.457
## T132:PRIMARYHHFISH       6.60504   19.24440   0.343
## T137:PRIMARYHHFISH       7.22641   20.28429   0.356
## T141:PRIMARYHHFISH      10.84328   20.88284   0.519
## T142:PRIMARYHHFISH       6.49379   18.78857   0.346
## T143:PRIMARYHHFISH       9.55755   18.99479   0.503
## T145:PRIMARYHHFISH       6.96299   19.16938   0.363
## T152:PRIMARYHHFISH       6.27891   21.49696   0.292
## T121:PRIMARYHHGARD     -10.13772    3.93747  -2.575
## T123:PRIMARYHHGARD      -9.46574    4.00134  -2.366
## T131:PRIMARYHHGARD      -6.33916    5.46589  -1.160
## T132:PRIMARYHHGARD     -10.95868    3.92998  -2.788
## T137:PRIMARYHHGARD      -9.74915    3.96028  -2.462
## T141:PRIMARYHHGARD     -10.59900    3.95453  -2.680
## T142:PRIMARYHHGARD      -9.95248    3.89718  -2.554
## T143:PRIMARYHHGARD     -12.23537    3.89952  -3.138
## T145:PRIMARYHHGARD     -11.25875    3.89856  -2.888
## T152:PRIMARYHHGARD     -10.48484    3.89636  -2.691
## T155:PRIMARYHHGARD     -11.79882    4.19530  -2.812
## T121:PRIMARYHHLVST       3.02210    8.94752   0.338
## T123:PRIMARYHHLVST       9.92423    9.02273   1.100
## T131:PRIMARYHHLVST     -10.78969   12.29382  -0.878
## T132:PRIMARYHHLVST       1.29142    8.93082   0.145
## T137:PRIMARYHHLVST       4.90421    9.04233   0.542
## T141:PRIMARYHHLVST       2.52882    8.88471   0.285
## T142:PRIMARYHHLVST       2.58812    8.94088   0.289
## T143:PRIMARYHHLVST      -0.94807    8.80562  -0.108
## T145:PRIMARYHHLVST       0.44477    8.79007   0.051
## T152:PRIMARYHHLVST       1.27716    8.84177   0.144
## T155:PRIMARYHHLVST      -2.58677   10.11686  -0.256
## T121:PRIMARYHHOTHR     -15.45221    2.22385  -6.948
## T123:PRIMARYHHOTHR     -14.11167    2.28652  -6.172
## T131:PRIMARYHHOTHR      -9.19932    3.10304  -2.965
## T132:PRIMARYHHOTHR     -16.08096    2.18392  -7.363
## T137:PRIMARYHHOTHR     -14.91221    2.15861  -6.908
## T141:PRIMARYHHOTHR     -14.01096    2.14728  -6.525
## T142:PRIMARYHHOTHR     -16.74783    2.16510  -7.735
## T143:PRIMARYHHOTHR     -15.85189    2.12287  -7.467
## T145:PRIMARYHHOTHR     -14.63859    2.13549  -6.855
## T152:PRIMARYHHOTHR     -16.96346    2.12991  -7.964
## T155:PRIMARYHHOTHR     -12.20857    2.75253  -4.435
## T121:PRIMARYHHRETIRE     1.53155    1.97532   0.775
## T123:PRIMARYHHRETIRE    -3.55621    2.21783  -1.603
## T131:PRIMARYHHRETIRE    -6.11997    2.62481  -2.332
## T132:PRIMARYHHRETIRE    -2.39910    2.00487  -1.197
## T137:PRIMARYHHRETIRE    -0.21861    1.96185  -0.111
## T141:PRIMARYHHRETIRE     0.69591    2.04958   0.340
## T142:PRIMARYHHRETIRE    -3.57599    1.98570  -1.801
## T143:PRIMARYHHRETIRE    -0.68156    2.02056  -0.337
## T145:PRIMARYHHRETIRE    -2.62042    1.97628  -1.326
## T152:PRIMARYHHRETIRE    -0.51261    2.01861  -0.254
## T155:PRIMARYHHRETIRE     3.12800    2.54880   1.227
## T121:PRIMARYHHSUB       -5.54266    2.15008  -2.578
## T123:PRIMARYHHSUB       -8.47326    2.22836  -3.802
## T131:PRIMARYHHSUB       -3.23920    3.39946  -0.953
## T132:PRIMARYHHSUB       -6.01058    2.16528  -2.776
## T137:PRIMARYHHSUB       -5.92944    2.09717  -2.827
## T141:PRIMARYHHSUB       -6.66027    2.21639  -3.005
## T142:PRIMARYHHSUB       -4.81160    2.14400  -2.244
## T143:PRIMARYHHSUB       -7.80352    2.13199  -3.660
## T145:PRIMARYHHSUB       -6.81338    2.15087  -3.168
## T152:PRIMARYHHSUB       -8.31807    2.16058  -3.850
## T155:PRIMARYHHSUB       -4.11441    3.35876  -1.225
## 
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
anova(m1)
## Analysis of Variance Table
##               npar  Sum Sq Mean Sq   F value
## CYEAR            1 1034212 1034212 5145.1393
## URBAN            1   67475   67475  335.6855
## T1              11   90947    8268   41.1323
## PRIMARY          7   30805    4401   21.8931
## CYEAR:URBAN      1   28974   28974  144.1417
## CYEAR:PRIMARY    7   47690    6813   33.8935
## URBAN:PRIMARY    7    3030     433    2.1536
## CYEAR:T1        11   97825    8893   44.2429
## URBAN:T1        11   13514    1229    6.1120
## T1:PRIMARY      74   48842     660    3.2836
qqmath(resid(m1))

plot(m1,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Initial Model Residuals")

# fit model w/ log transform w/ outliers 
m2 = lmer(log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + 
          CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
          + CYEAR*T1 + URBAN*T1 + T1*PRIMARY
        + (1 | HHID), data=df)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR *  
##     URBAN + PRIMARY * CYEAR + URBAN * PRIMARY + CYEAR * T1 +  
##     URBAN * T1 + T1 * PRIMARY + (1 | HHID)
##    Data: df
## 
## REML criterion at convergence: -107455.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -138.909   -0.270   -0.057    0.136   23.258 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  HHID     (Intercept) 0.0005046 0.02246 
##  Residual             0.0044727 0.06688 
## Number of obs: 43671, groups:  HHID, 9628
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)             5.2550586  0.0293460 179.072
## CYEAR                   0.0092445  0.0011382   8.122
## URBAN1                  0.0425616  0.0083542   5.095
## T121                    0.1685402  0.0294411   5.725
## T123                    0.1259703  0.0297327   4.237
## T131                    0.0713243  0.0416467   1.713
## T132                    0.1743841  0.0294447   5.922
## T137                    0.1731355  0.0294304   5.883
## T141                    0.1685982  0.0294378   5.727
## T142                    0.1666256  0.0294324   5.661
## T143                    0.1906330  0.0294113   6.482
## T145                    0.1755471  0.0293896   5.973
## T152                    0.1654509  0.0294083   5.626
## T155                    0.1501979  0.0406290   3.697
## PRIMARYHHFARM           0.0413012  0.0206219   2.003
## PRIMARYHHFISH          -0.0314548  0.0740300  -0.425
## PRIMARYHHGARD           0.0329332  0.0156022   2.111
## PRIMARYHHLVST          -0.0124218  0.0368227  -0.337
## PRIMARYHHOTHR           0.0403738  0.0089815   4.495
## PRIMARYHHRETIRE        -0.0067145  0.0081535  -0.824
## PRIMARYHHSUB            0.0200021  0.0087296   2.291
## CYEAR:URBAN1            0.0003701  0.0001258   2.943
## CYEAR:PRIMARYHHFARM    -0.0018322  0.0001516 -12.082
## CYEAR:PRIMARYHHFISH    -0.0001523  0.0008295  -0.184
## CYEAR:PRIMARYHHGARD    -0.0005356  0.0001509  -3.548
## CYEAR:PRIMARYHHLVST    -0.0002802  0.0003285  -0.853
## CYEAR:PRIMARYHHOTHR    -0.0004947  0.0001648  -3.003
## CYEAR:PRIMARYHHRETIRE   0.0008158  0.0001754   4.651
## CYEAR:PRIMARYHHSUB     -0.0004172  0.0001813  -2.301
## URBAN1:PRIMARYHHFARM   -0.0067629  0.0063100  -1.072
## URBAN1:PRIMARYHHFISH    0.0192441  0.0302786   0.636
## URBAN1:PRIMARYHHGARD   -0.0011049  0.0044282  -0.250
## URBAN1:PRIMARYHHLVST    0.0027009  0.0091283   0.296
## URBAN1:PRIMARYHHOTHR    0.0019336  0.0025132   0.769
## URBAN1:PRIMARYHHRETIRE  0.0014745  0.0029157   0.506
## URBAN1:PRIMARYHHSUB     0.0046811  0.0031595   1.482
## CYEAR:T121             -0.0057717  0.0011414  -5.057
## CYEAR:T123             -0.0043374  0.0011527  -3.763
## CYEAR:T131             -0.0008384  0.0016031  -0.523
## CYEAR:T132             -0.0054180  0.0011409  -4.749
## CYEAR:T137             -0.0063704  0.0011397  -5.589
## CYEAR:T141             -0.0070055  0.0011397  -6.147
## CYEAR:T142             -0.0059912  0.0011400  -5.255
## CYEAR:T143             -0.0074354  0.0011403  -6.521
## CYEAR:T145             -0.0074547  0.0011398  -6.540
## CYEAR:T152             -0.0065829  0.0011401  -5.774
## CYEAR:T155             -0.0063080  0.0015979  -3.948
## URBAN1:T121            -0.0473368  0.0083780  -5.650
## URBAN1:T123            -0.0128469  0.0090252  -1.423
## URBAN1:T131            -0.0389441  0.0101721  -3.829
## URBAN1:T132            -0.0468964  0.0083897  -5.590
## URBAN1:T137            -0.0468388  0.0082888  -5.651
## URBAN1:T141            -0.0404064  0.0084899  -4.759
## URBAN1:T142            -0.0471390  0.0084249  -5.595
## URBAN1:T143            -0.0487929  0.0082902  -5.886
## URBAN1:T145            -0.0460297  0.0084264  -5.463
## URBAN1:T152            -0.0289733  0.0084662  -3.422
## URBAN1:T155            -0.0325769  0.0093852  -3.471
## T121:PRIMARYHHFARM     -0.0341761  0.0207243  -1.649
## T123:PRIMARYHHFARM     -0.0152346  0.0208186  -0.732
## T131:PRIMARYHHFARM     -0.0328921  0.0542515  -0.606
## T132:PRIMARYHHFARM     -0.0411895  0.0207143  -1.988
## T137:PRIMARYHHFARM     -0.0350057  0.0206902  -1.692
## T141:PRIMARYHHFARM     -0.0323764  0.0206371  -1.569
## T142:PRIMARYHHFARM     -0.0296306  0.0206965  -1.432
## T143:PRIMARYHHFARM     -0.0725540  0.0208218  -3.485
## T145:PRIMARYHHFARM     -0.0323826  0.0206788  -1.566
## T152:PRIMARYHHFARM     -0.0325656  0.0206666  -1.576
## T155:PRIMARYHHFARM     -0.0048046  0.0239085  -0.201
## T121:PRIMARYHHFISH      0.0288273  0.0820042   0.352
## T132:PRIMARYHHFISH      0.0357645  0.0747390   0.479
## T137:PRIMARYHHFISH      0.0281831  0.0798660   0.353
## T141:PRIMARYHHFISH      0.0442552  0.0827339   0.535
## T142:PRIMARYHHFISH      0.0291210  0.0722135   0.403
## T143:PRIMARYHHFISH      0.0326858  0.0731650   0.447
## T145:PRIMARYHHFISH      0.0351151  0.0743432   0.472
## T152:PRIMARYHHFISH      0.0303029  0.0869223   0.349
## T121:PRIMARYHHGARD     -0.0300917  0.0157505  -1.911
## T123:PRIMARYHHGARD     -0.0244437  0.0160580  -1.522
## T131:PRIMARYHHGARD     -0.0328620  0.0217274  -1.512
## T132:PRIMARYHHGARD     -0.0333316  0.0157589  -2.115
## T137:PRIMARYHHGARD     -0.0289047  0.0158825  -1.820
## T141:PRIMARYHHGARD     -0.0306987  0.0158854  -1.933
## T142:PRIMARYHHGARD     -0.0293501  0.0155936  -1.882
## T143:PRIMARYHHGARD     -0.0421373  0.0156022  -2.701
## T145:PRIMARYHHGARD     -0.0323557  0.0155970  -2.074
## T152:PRIMARYHHGARD     -0.0309585  0.0156039  -1.984
## T155:PRIMARYHHGARD     -0.0364870  0.0166526  -2.191
## T121:PRIMARYHHLVST      0.0172377  0.0373374   0.462
## T123:PRIMARYHHLVST      0.0448901  0.0376230   1.193
## T131:PRIMARYHHLVST     -0.0354855  0.0509504  -0.696
## T132:PRIMARYHHLVST      0.0102568  0.0372649   0.275
## T137:PRIMARYHHLVST      0.0244022  0.0378063   0.645
## T141:PRIMARYHHLVST      0.0178229  0.0370488   0.481
## T142:PRIMARYHHLVST      0.0215238  0.0373147   0.577
## T143:PRIMARYHHLVST     -0.0044711  0.0366397  -0.122
## T145:PRIMARYHHLVST      0.0114797  0.0365627   0.314
## T152:PRIMARYHHLVST      0.0133457  0.0368259   0.362
## T155:PRIMARYHHLVST     -0.0077738  0.0415474  -0.187
## T121:PRIMARYHHOTHR     -0.0449265  0.0091304  -4.921
## T123:PRIMARYHHOTHR     -0.0352599  0.0093919  -3.754
## T131:PRIMARYHHOTHR     -0.0250123  0.0125662  -1.990
## T132:PRIMARYHHOTHR     -0.0427954  0.0089565  -4.778
## T137:PRIMARYHHOTHR     -0.0401672  0.0088472  -4.540
## T141:PRIMARYHHOTHR     -0.0358910  0.0087883  -4.084
## T142:PRIMARYHHOTHR     -0.0478567  0.0089024  -5.376
## T143:PRIMARYHHOTHR     -0.0442005  0.0086756  -5.095
## T145:PRIMARYHHOTHR     -0.0368456  0.0087355  -4.218
## T152:PRIMARYHHOTHR     -0.0489143  0.0087165  -5.612
## T155:PRIMARYHHOTHR     -0.0273605  0.0111151  -2.462
## T121:PRIMARYHHRETIRE    0.0085013  0.0076208   1.116
## T123:PRIMARYHHRETIRE   -0.0096738  0.0087065  -1.111
## T131:PRIMARYHHRETIRE   -0.0226773  0.0101822  -2.227
## T132:PRIMARYHHRETIRE   -0.0059007  0.0077780  -0.759
## T137:PRIMARYHHRETIRE    0.0035008  0.0076273   0.459
## T141:PRIMARYHHRETIRE    0.0068253  0.0079813   0.855
## T142:PRIMARYHHRETIRE   -0.0113179  0.0077892  -1.453
## T143:PRIMARYHHRETIRE    0.0031822  0.0078853   0.404
## T145:PRIMARYHHRETIRE   -0.0047902  0.0077104  -0.621
## T152:PRIMARYHHRETIRE    0.0002537  0.0079037   0.032
## T155:PRIMARYHHRETIRE    0.0113551  0.0097214   1.168
## T121:PRIMARYHHSUB      -0.0112021  0.0087526  -1.280
## T123:PRIMARYHHSUB      -0.0229826  0.0091654  -2.508
## T131:PRIMARYHHSUB      -0.0059112  0.0137822  -0.429
## T132:PRIMARYHHSUB      -0.0148314  0.0088362  -1.678
## T137:PRIMARYHHSUB      -0.0152182  0.0085786  -1.774
## T141:PRIMARYHHSUB      -0.0150387  0.0089959  -1.672
## T142:PRIMARYHHSUB      -0.0097392  0.0088196  -1.104
## T143:PRIMARYHHSUB      -0.0238762  0.0087210  -2.738
## T145:PRIMARYHHSUB      -0.0182125  0.0088387  -2.061
## T152:PRIMARYHHSUB      -0.0216936  0.0088343  -2.456
## T155:PRIMARYHHSUB      -0.0130575  0.0136407  -0.957
## 
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
qqmath(resid(m2))

plot(m2,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Logged Model Residuals")

# fit model w/ log transform w/o outliers 
m3 = lmer(log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + 
          CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
          + CYEAR*T1 + URBAN*T1 + T1*PRIMARY
        + (1 | HHID), data=df_trimmed)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR *  
##     URBAN + PRIMARY * CYEAR + URBAN * PRIMARY + CYEAR * T1 +  
##     URBAN * T1 + T1 * PRIMARY + (1 | HHID)
##    Data: df_trimmed
## 
## REML criterion at convergence: 82721.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.5981 -0.5149  0.0153  0.5331  6.2093 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  HHID     (Intercept) 0.1116   0.3341  
##  Residual             0.3194   0.5651  
## Number of obs: 43583, groups:  HHID, 9623
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)             1.5618304  0.2581509   6.050
## CYEAR                   0.0430227  0.0098913   4.350
## URBAN1                  0.3160395  0.0798599   3.957
## T121                    0.0146482  0.2592921   0.056
## T123                   -0.7170148  0.2619674  -2.737
## T131                    0.2014195  0.3661220   0.550
## T132                    0.1329570  0.2593712   0.513
## T137                   -0.0269323  0.2592029  -0.104
## T141                   -0.1838794  0.2592722  -0.709
## T142                   -0.1753367  0.2592573  -0.676
## T143                    0.0678627  0.2590604   0.262
## T145                   -0.0133963  0.2588767  -0.052
## T152                   -0.1956473  0.2589818  -0.755
## T155                   -0.0024103  0.3552454  -0.007
## PRIMARYHHFARM          -0.1384962  0.1855914  -0.746
## PRIMARYHHFISH          -0.2240370  0.6854126  -0.327
## PRIMARYHHGARD          -0.1492003  0.1436812  -1.038
## PRIMARYHHLVST          -0.8132208  0.3327167  -2.444
## PRIMARYHHOTHR           0.1795430  0.0821877   2.185
## PRIMARYHHRETIRE        -0.1162181  0.0755399  -1.539
## PRIMARYHHSUB            0.0042437  0.0795875   0.053
## CYEAR:URBAN1            0.0035260  0.0011303   3.120
## CYEAR:PRIMARYHHFARM    -0.0063348  0.0013276  -4.772
## CYEAR:PRIMARYHHFISH     0.0033799  0.0072426   0.467
## CYEAR:PRIMARYHHGARD     0.0089151  0.0013218   6.745
## CYEAR:PRIMARYHHLVST     0.0095225  0.0028547   3.336
## CYEAR:PRIMARYHHOTHR    -0.0053007  0.0014460  -3.666
## CYEAR:PRIMARYHHRETIRE   0.0166843  0.0015656  10.657
## CYEAR:PRIMARYHHSUB      0.0004642  0.0016082   0.289
## URBAN1:PRIMARYHHFARM   -0.0394113  0.0567223  -0.695
## URBAN1:PRIMARYHHFISH    0.1679606  0.2641293   0.636
## URBAN1:PRIMARYHHGARD   -0.0259260  0.0397746  -0.652
## URBAN1:PRIMARYHHLVST    0.0652337  0.0798325   0.817
## URBAN1:PRIMARYHHOTHR   -0.0169596  0.0224079  -0.757
## URBAN1:PRIMARYHHRETIRE  0.0504328  0.0270507   1.864
## URBAN1:PRIMARYHHSUB     0.1086176  0.0284339   3.820
## CYEAR:T121              0.0069811  0.0099227   0.704
## CYEAR:T123              0.0262604  0.0100198   2.621
## CYEAR:T131              0.0095234  0.0139345   0.683
## CYEAR:T132              0.0079582  0.0099176   0.802
## CYEAR:T137             -0.0019718  0.0099060  -0.199
## CYEAR:T141             -0.0109794  0.0099049  -1.108
## CYEAR:T142              0.0027835  0.0099058   0.281
## CYEAR:T143             -0.0116312  0.0099099  -1.174
## CYEAR:T145             -0.0160639  0.0099062  -1.622
## CYEAR:T152             -0.0036789  0.0099088  -0.371
## CYEAR:T155             -0.0228148  0.0138438  -1.648
## URBAN1:T121            -0.3438524  0.0821327  -4.187
## URBAN1:T123             0.2240949  0.0876493   2.557
## URBAN1:T131            -0.2653309  0.0991733  -2.675
## URBAN1:T132            -0.3924686  0.0822641  -4.771
## URBAN1:T137            -0.3556785  0.0813637  -4.371
## URBAN1:T141            -0.1330007  0.0832333  -1.598
## URBAN1:T142            -0.2787304  0.0831610  -3.352
## URBAN1:T143            -0.2763176  0.0816202  -3.385
## URBAN1:T145            -0.3506235  0.0825019  -4.250
## URBAN1:T152            -0.0183336  0.0829218  -0.221
## URBAN1:T155            -0.1033889  0.0909590  -1.137
## T121:PRIMARYHHFARM     -0.1926021  0.1867068  -1.032
## T123:PRIMARYHHFARM      0.2249867  0.1876227   1.199
## T131:PRIMARYHHFARM     -0.1688836  0.4902134  -0.345
## T132:PRIMARYHHFARM     -0.1689360  0.1865117  -0.906
## T137:PRIMARYHHFARM     -0.0277265  0.1863729  -0.149
## T141:PRIMARYHHFARM     -0.0762178  0.1858284  -0.410
## T142:PRIMARYHHFARM     -0.0282780  0.1864352  -0.152
## T143:PRIMARYHHFARM     -0.1295494  0.1874575  -0.691
## T145:PRIMARYHHFARM     -0.1111607  0.1861745  -0.597
## T152:PRIMARYHHFARM     -0.1011704  0.1860285  -0.544
## T155:PRIMARYHHFARM      0.5160301  0.2172880   2.375
## T121:PRIMARYHHFISH      0.3568243  0.7492528   0.476
## T132:PRIMARYHHFISH      0.3194231  0.6909858   0.462
## T137:PRIMARYHHFISH      0.2834861  0.7345091   0.386
## T141:PRIMARYHHFISH      0.5274449  0.7599429   0.694
## T142:PRIMARYHHFISH      0.2415545  0.6709912   0.360
## T143:PRIMARYHHFISH      0.4538453  0.6794552   0.668
## T145:PRIMARYHHFISH      0.3367605  0.6878997   0.490
## T152:PRIMARYHHFISH      0.1305119  0.7885433   0.166
## T121:PRIMARYHHGARD     -0.1621139  0.1452369  -1.116
## T123:PRIMARYHHGARD      0.0771567  0.1479006   0.522
## T131:PRIMARYHHGARD     -0.2279779  0.2025880  -1.125
## T132:PRIMARYHHGARD     -0.0845802  0.1450825  -0.583
## T137:PRIMARYHHGARD      0.1198030  0.1462495   0.819
## T141:PRIMARYHHGARD      0.0125943  0.1460937   0.086
## T142:PRIMARYHHGARD      0.0062095  0.1437221   0.043
## T143:PRIMARYHHGARD     -0.0449206  0.1437907  -0.312
## T145:PRIMARYHHGARD     -0.0809027  0.1437526  -0.563
## T152:PRIMARYHHGARD     -0.0236003  0.1437133  -0.164
## T155:PRIMARYHHGARD      0.0757281  0.1540392   0.492
## T121:PRIMARYHHLVST      0.4768837  0.3370989   1.415
## T123:PRIMARYHHLVST      1.1860814  0.3400202   3.488
## T131:PRIMARYHHLVST      0.1853870  0.4615335   0.402
## T132:PRIMARYHHLVST      0.5719443  0.3365163   1.700
## T137:PRIMARYHHLVST      0.8743785  0.3409695   2.564
## T141:PRIMARYHHLVST      0.7971393  0.3345954   2.382
## T142:PRIMARYHHLVST      0.7258149  0.3368771   2.155
## T143:PRIMARYHHLVST      0.3801149  0.3312613   1.147
## T145:PRIMARYHHLVST      0.5781784  0.3306270   1.749
## T152:PRIMARYHHLVST      0.5408523  0.3327848   1.625
## T155:PRIMARYHHLVST      0.4866854  0.3779092   1.288
## T121:PRIMARYHHOTHR     -0.3280062  0.0837051  -3.919
## T123:PRIMARYHHOTHR     -0.1298177  0.0861693  -1.507
## T131:PRIMARYHHOTHR     -0.1703986  0.1156399  -1.474
## T132:PRIMARYHHOTHR     -0.1887592  0.0821778  -2.297
## T137:PRIMARYHHOTHR     -0.2041672  0.0812045  -2.514
## T141:PRIMARYHHOTHR     -0.2007368  0.0807209  -2.487
## T142:PRIMARYHHOTHR     -0.3418783  0.0815589  -4.192
## T143:PRIMARYHHOTHR     -0.2489674  0.0797648  -3.121
## T145:PRIMARYHHOTHR     -0.1821113  0.0802630  -2.269
## T152:PRIMARYHHOTHR     -0.3405880  0.0800679  -4.254
## T155:PRIMARYHHOTHR      0.0591478  0.1023612   0.578
## T121:PRIMARYHHRETIRE    0.0861324  0.0716166   1.203
## T123:PRIMARYHHRETIRE   -0.0861180  0.0812796  -1.060
## T131:PRIMARYHHRETIRE   -0.2966162  0.0952242  -3.115
## T132:PRIMARYHHRETIRE   -0.0235444  0.0729320  -0.323
## T137:PRIMARYHHRETIRE    0.1376026  0.0713851   1.928
## T141:PRIMARYHHRETIRE    0.1580941  0.0747122   2.116
## T142:PRIMARYHHRETIRE   -0.0721954  0.0725979  -0.994
## T143:PRIMARYHHRETIRE    0.1231290  0.0737136   1.670
## T145:PRIMARYHHRETIRE    0.0111629  0.0720137   0.155
## T152:PRIMARYHHRETIRE   -0.0228738  0.0737762  -0.310
## T155:PRIMARYHHRETIRE    0.3507000  0.0915713   3.830
## T121:PRIMARYHHSUB      -0.0354432  0.0802377  -0.442
## T123:PRIMARYHHSUB      -0.1092376  0.0836217  -1.306
## T131:PRIMARYHHSUB      -0.0610588  0.1266517  -0.482
## T132:PRIMARYHHSUB      -0.0372753  0.0809144  -0.461
## T137:PRIMARYHHSUB      -0.0350315  0.0784096  -0.447
## T141:PRIMARYHHSUB      -0.0803380  0.0826573  -0.972
## T142:PRIMARYHHSUB       0.0334957  0.0804020   0.417
## T143:PRIMARYHHSUB      -0.1176882  0.0797352  -1.476
## T145:PRIMARYHHSUB      -0.1458065  0.0805663  -1.810
## T152:PRIMARYHHSUB      -0.1596282  0.0808367  -1.975
## T155:PRIMARYHHSUB       0.0341838  0.1246815   0.274
## 
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
qqmath(resid(m3))

plot(m3,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Residuals of log model without outliers")

Initial Model

colnames(df)
##  [1] "HHID"              "WAVE"              "HHINC_CPI"        
##  [4] "HHINCPC_CPI"       "HHBUS"             "HHBUSIMP"         
##  [7] "HHFARM"            "HHFARMIMP"         "HHFISH"           
## [10] "HHFISHIMP"         "HHGARD"            "HHGARDIMP"        
## [13] "HHLVST"            "HHLVSTIMP"         "HHNRWAGE"         
## [16] "HHNRWAGEIMP"       "HHOTHR"            "HHOTHRIMP"        
## [19] "HHRETIRE"          "HHRETIREIMP"       "HHSUB"            
## [22] "HHSUBIMP"          "T1"                "URBAN"            
## [25] "THOUSANDS"         "CYEAR"             "THOUSANDS_SHIFTED"
## [28] "PRIMARY"           "BUSINESS_IND"      "FARM_IND"         
## [31] "FISH_IND"          "GARD_IND"          "LVST_IND"         
## [34] "OTHR_IND"          "RETIRE_IND"        "SUB_IND"
# fit model with all interactions
m1 = lmer(THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + 
          CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
          + CYEAR*T1 + URBAN*T1 + T1*PRIMARY
        + (1 | HHID), data=df)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR * URBAN + PRIMARY *  
##     CYEAR + URBAN * PRIMARY + CYEAR * T1 + URBAN * T1 + T1 *  
##     PRIMARY + (1 | HHID)
##    Data: df
## 
## REML criterion at convergence: 367440
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -14.025  -0.284  -0.050   0.163  46.491 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  HHID     (Intercept) 138.9    11.79   
##  Residual             201.0    14.18   
## Number of obs: 43671, groups:  HHID, 9628
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)            -63.47811    6.67646  -9.508
## CYEAR                    3.11303    0.25273  12.318
## URBAN1                  12.85672    2.26241   5.683
## T121                    62.98502    6.71494   9.380
## T123                    52.80192    6.78430   7.783
## T131                    29.81922    9.49085   3.142
## T132                    64.66848    6.71860   9.625
## T137                    64.71300    6.71282   9.640
## T141                    63.39543    6.71487   9.441
## T142                    63.08973    6.71538   9.395
## T143                    66.44409    6.70987   9.902
## T145                    64.97931    6.70591   9.690
## T152                    62.86877    6.70721   9.373
## T155                    57.78970    9.16511   6.305
## PRIMARYHHFARM           13.23116    4.91987   2.689
## PRIMARYHHFISH           -6.92581   19.11678  -0.362
## PRIMARYHHGARD           11.63014    3.89550   2.986
## PRIMARYHHLVST           -0.38807    8.84008  -0.044
## PRIMARYHHOTHR           15.16196    2.18153   6.950
## PRIMARYHHRETIRE         -0.73363    2.05914  -0.356
## PRIMARYHHSUB             6.77936    2.12599   3.189
## CYEAR:URBAN1             0.11153    0.02951   3.780
## CYEAR:PRIMARYHHFARM     -0.39521    0.03386 -11.671
## CYEAR:PRIMARYHHFISH     -0.07912    0.18440  -0.429
## CYEAR:PRIMARYHHGARD     -0.15564    0.03380  -4.605
## CYEAR:PRIMARYHHLVST     -0.13042    0.07271  -1.794
## CYEAR:PRIMARYHHOTHR     -0.13329    0.03705  -3.597
## CYEAR:PRIMARYHHRETIRE    0.14383    0.04066   3.537
## CYEAR:PRIMARYHHSUB      -0.18086    0.04160  -4.347
## URBAN1:PRIMARYHHFARM    -0.56676    1.44833  -0.391
## URBAN1:PRIMARYHHFISH     4.84726    6.73914   0.719
## URBAN1:PRIMARYHHGARD     0.26312    1.03494   0.254
## URBAN1:PRIMARYHHLVST     0.82906    2.03694   0.407
## URBAN1:PRIMARYHHOTHR     0.26005    0.57997   0.448
## URBAN1:PRIMARYHHRETIRE  -0.03134    0.72007  -0.044
## URBAN1:PRIMARYHHSUB      1.22021    0.74191   1.645
## CYEAR:T121              -2.22155    0.25361  -8.760
## CYEAR:T123              -1.88498    0.25607  -7.361
## CYEAR:T131              -0.51588    0.35655  -1.447
## CYEAR:T132              -2.14269    0.25346  -8.454
## CYEAR:T137              -2.39658    0.25314  -9.468
## CYEAR:T141              -2.57461    0.25309 -10.173
## CYEAR:T142              -2.28326    0.25307  -9.022
## CYEAR:T143              -2.52265    0.25321  -9.963
## CYEAR:T145              -2.64231    0.25314 -10.438
## CYEAR:T152              -2.47050    0.25320  -9.757
## CYEAR:T155              -2.34270    0.35351  -6.627
## URBAN1:T121            -13.71257    2.37909  -5.764
## URBAN1:T123             -4.01332    2.51266  -1.597
## URBAN1:T131            -14.29782    2.86328  -4.994
## URBAN1:T132            -13.57151    2.38214  -5.697
## URBAN1:T137            -13.49382    2.35916  -5.720
## URBAN1:T141            -11.25385    2.40654  -4.676
## URBAN1:T142            -13.71377    2.42098  -5.665
## URBAN1:T143            -13.76509    2.37244  -5.802
## URBAN1:T145            -12.02212    2.38655  -5.037
## URBAN1:T152             -8.59692    2.39926  -3.583
## URBAN1:T155            -10.46102    2.61657  -3.998
## T121:PRIMARYHHFARM     -10.63811    4.95069  -2.149
## T123:PRIMARYHHFARM      -7.97679    4.97398  -1.604
## T131:PRIMARYHHFARM     -18.39630   13.01617  -1.413
## T132:PRIMARYHHFARM     -12.93698    4.94431  -2.617
## T137:PRIMARYHHFARM     -11.05728    4.94173  -2.238
## T141:PRIMARYHHFARM     -11.02634    4.92713  -2.238
## T142:PRIMARYHHFARM      -9.56899    4.94251  -1.936
## T143:PRIMARYHHFARM     -13.82735    4.96648  -2.784
## T145:PRIMARYHHFARM     -11.28105    4.93556  -2.286
## T152:PRIMARYHHFARM     -10.91695    4.93148  -2.214
## T155:PRIMARYHHFARM      -4.84221    5.78368  -0.837
## T121:PRIMARYHHFISH       9.40252   20.58883   0.457
## T132:PRIMARYHHFISH       6.60504   19.24440   0.343
## T137:PRIMARYHHFISH       7.22641   20.28429   0.356
## T141:PRIMARYHHFISH      10.84328   20.88284   0.519
## T142:PRIMARYHHFISH       6.49379   18.78857   0.346
## T143:PRIMARYHHFISH       9.55755   18.99479   0.503
## T145:PRIMARYHHFISH       6.96299   19.16938   0.363
## T152:PRIMARYHHFISH       6.27891   21.49696   0.292
## T121:PRIMARYHHGARD     -10.13772    3.93747  -2.575
## T123:PRIMARYHHGARD      -9.46574    4.00134  -2.366
## T131:PRIMARYHHGARD      -6.33916    5.46589  -1.160
## T132:PRIMARYHHGARD     -10.95868    3.92998  -2.788
## T137:PRIMARYHHGARD      -9.74915    3.96028  -2.462
## T141:PRIMARYHHGARD     -10.59900    3.95453  -2.680
## T142:PRIMARYHHGARD      -9.95248    3.89718  -2.554
## T143:PRIMARYHHGARD     -12.23537    3.89952  -3.138
## T145:PRIMARYHHGARD     -11.25875    3.89856  -2.888
## T152:PRIMARYHHGARD     -10.48484    3.89636  -2.691
## T155:PRIMARYHHGARD     -11.79882    4.19530  -2.812
## T121:PRIMARYHHLVST       3.02210    8.94752   0.338
## T123:PRIMARYHHLVST       9.92423    9.02273   1.100
## T131:PRIMARYHHLVST     -10.78969   12.29382  -0.878
## T132:PRIMARYHHLVST       1.29142    8.93082   0.145
## T137:PRIMARYHHLVST       4.90421    9.04233   0.542
## T141:PRIMARYHHLVST       2.52882    8.88471   0.285
## T142:PRIMARYHHLVST       2.58812    8.94088   0.289
## T143:PRIMARYHHLVST      -0.94807    8.80562  -0.108
## T145:PRIMARYHHLVST       0.44477    8.79007   0.051
## T152:PRIMARYHHLVST       1.27716    8.84177   0.144
## T155:PRIMARYHHLVST      -2.58677   10.11686  -0.256
## T121:PRIMARYHHOTHR     -15.45221    2.22385  -6.948
## T123:PRIMARYHHOTHR     -14.11167    2.28652  -6.172
## T131:PRIMARYHHOTHR      -9.19932    3.10304  -2.965
## T132:PRIMARYHHOTHR     -16.08096    2.18392  -7.363
## T137:PRIMARYHHOTHR     -14.91221    2.15861  -6.908
## T141:PRIMARYHHOTHR     -14.01096    2.14728  -6.525
## T142:PRIMARYHHOTHR     -16.74783    2.16510  -7.735
## T143:PRIMARYHHOTHR     -15.85189    2.12287  -7.467
## T145:PRIMARYHHOTHR     -14.63859    2.13549  -6.855
## T152:PRIMARYHHOTHR     -16.96346    2.12991  -7.964
## T155:PRIMARYHHOTHR     -12.20857    2.75253  -4.435
## T121:PRIMARYHHRETIRE     1.53155    1.97532   0.775
## T123:PRIMARYHHRETIRE    -3.55621    2.21783  -1.603
## T131:PRIMARYHHRETIRE    -6.11997    2.62481  -2.332
## T132:PRIMARYHHRETIRE    -2.39910    2.00487  -1.197
## T137:PRIMARYHHRETIRE    -0.21861    1.96185  -0.111
## T141:PRIMARYHHRETIRE     0.69591    2.04958   0.340
## T142:PRIMARYHHRETIRE    -3.57599    1.98570  -1.801
## T143:PRIMARYHHRETIRE    -0.68156    2.02056  -0.337
## T145:PRIMARYHHRETIRE    -2.62042    1.97628  -1.326
## T152:PRIMARYHHRETIRE    -0.51261    2.01861  -0.254
## T155:PRIMARYHHRETIRE     3.12800    2.54880   1.227
## T121:PRIMARYHHSUB       -5.54266    2.15008  -2.578
## T123:PRIMARYHHSUB       -8.47326    2.22836  -3.802
## T131:PRIMARYHHSUB       -3.23920    3.39946  -0.953
## T132:PRIMARYHHSUB       -6.01058    2.16528  -2.776
## T137:PRIMARYHHSUB       -5.92944    2.09717  -2.827
## T141:PRIMARYHHSUB       -6.66027    2.21639  -3.005
## T142:PRIMARYHHSUB       -4.81160    2.14400  -2.244
## T143:PRIMARYHHSUB       -7.80352    2.13199  -3.660
## T145:PRIMARYHHSUB       -6.81338    2.15087  -3.168
## T152:PRIMARYHHSUB       -8.31807    2.16058  -3.850
## T155:PRIMARYHHSUB       -4.11441    3.35876  -1.225
## 
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
anova(m1)
## Analysis of Variance Table
##               npar  Sum Sq Mean Sq   F value
## CYEAR            1 1034212 1034212 5145.1393
## URBAN            1   67475   67475  335.6855
## T1              11   90947    8268   41.1323
## PRIMARY          7   30805    4401   21.8931
## CYEAR:URBAN      1   28974   28974  144.1417
## CYEAR:PRIMARY    7   47690    6813   33.8935
## URBAN:PRIMARY    7    3030     433    2.1536
## CYEAR:T1        11   97825    8893   44.2429
## URBAN:T1        11   13514    1229    6.1120
## T1:PRIMARY      74   48842     660    3.2836
qqmath(resid(m1))

plot(m1,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Initial Model Residuals")

We note that the residual plot looks bad and has a fanned shape from left to right. We try a log transform of the response variable. Note that log cannot take on negative values, so we shift all values in the dataset by the minimum value in the dataset and add 0.01. This is the denoted as the variable THOUSANDS_SHIFTED, which we computed above.

# fit model w/ log transform w/ outliers 
m2 = lmer(log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + 
          CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
          + CYEAR*T1 + URBAN*T1 + T1*PRIMARY
        + (1 | HHID), data=df)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR *  
##     URBAN + PRIMARY * CYEAR + URBAN * PRIMARY + CYEAR * T1 +  
##     URBAN * T1 + T1 * PRIMARY + (1 | HHID)
##    Data: df
## 
## REML criterion at convergence: -107455.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -138.909   -0.270   -0.057    0.136   23.258 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  HHID     (Intercept) 0.0005046 0.02246 
##  Residual             0.0044727 0.06688 
## Number of obs: 43671, groups:  HHID, 9628
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)             5.2550586  0.0293460 179.072
## CYEAR                   0.0092445  0.0011382   8.122
## URBAN1                  0.0425616  0.0083542   5.095
## T121                    0.1685402  0.0294411   5.725
## T123                    0.1259703  0.0297327   4.237
## T131                    0.0713243  0.0416467   1.713
## T132                    0.1743841  0.0294447   5.922
## T137                    0.1731355  0.0294304   5.883
## T141                    0.1685982  0.0294378   5.727
## T142                    0.1666256  0.0294324   5.661
## T143                    0.1906330  0.0294113   6.482
## T145                    0.1755471  0.0293896   5.973
## T152                    0.1654509  0.0294083   5.626
## T155                    0.1501979  0.0406290   3.697
## PRIMARYHHFARM           0.0413012  0.0206219   2.003
## PRIMARYHHFISH          -0.0314548  0.0740300  -0.425
## PRIMARYHHGARD           0.0329332  0.0156022   2.111
## PRIMARYHHLVST          -0.0124218  0.0368227  -0.337
## PRIMARYHHOTHR           0.0403738  0.0089815   4.495
## PRIMARYHHRETIRE        -0.0067145  0.0081535  -0.824
## PRIMARYHHSUB            0.0200021  0.0087296   2.291
## CYEAR:URBAN1            0.0003701  0.0001258   2.943
## CYEAR:PRIMARYHHFARM    -0.0018322  0.0001516 -12.082
## CYEAR:PRIMARYHHFISH    -0.0001523  0.0008295  -0.184
## CYEAR:PRIMARYHHGARD    -0.0005356  0.0001509  -3.548
## CYEAR:PRIMARYHHLVST    -0.0002802  0.0003285  -0.853
## CYEAR:PRIMARYHHOTHR    -0.0004947  0.0001648  -3.003
## CYEAR:PRIMARYHHRETIRE   0.0008158  0.0001754   4.651
## CYEAR:PRIMARYHHSUB     -0.0004172  0.0001813  -2.301
## URBAN1:PRIMARYHHFARM   -0.0067629  0.0063100  -1.072
## URBAN1:PRIMARYHHFISH    0.0192441  0.0302786   0.636
## URBAN1:PRIMARYHHGARD   -0.0011049  0.0044282  -0.250
## URBAN1:PRIMARYHHLVST    0.0027009  0.0091283   0.296
## URBAN1:PRIMARYHHOTHR    0.0019336  0.0025132   0.769
## URBAN1:PRIMARYHHRETIRE  0.0014745  0.0029157   0.506
## URBAN1:PRIMARYHHSUB     0.0046811  0.0031595   1.482
## CYEAR:T121             -0.0057717  0.0011414  -5.057
## CYEAR:T123             -0.0043374  0.0011527  -3.763
## CYEAR:T131             -0.0008384  0.0016031  -0.523
## CYEAR:T132             -0.0054180  0.0011409  -4.749
## CYEAR:T137             -0.0063704  0.0011397  -5.589
## CYEAR:T141             -0.0070055  0.0011397  -6.147
## CYEAR:T142             -0.0059912  0.0011400  -5.255
## CYEAR:T143             -0.0074354  0.0011403  -6.521
## CYEAR:T145             -0.0074547  0.0011398  -6.540
## CYEAR:T152             -0.0065829  0.0011401  -5.774
## CYEAR:T155             -0.0063080  0.0015979  -3.948
## URBAN1:T121            -0.0473368  0.0083780  -5.650
## URBAN1:T123            -0.0128469  0.0090252  -1.423
## URBAN1:T131            -0.0389441  0.0101721  -3.829
## URBAN1:T132            -0.0468964  0.0083897  -5.590
## URBAN1:T137            -0.0468388  0.0082888  -5.651
## URBAN1:T141            -0.0404064  0.0084899  -4.759
## URBAN1:T142            -0.0471390  0.0084249  -5.595
## URBAN1:T143            -0.0487929  0.0082902  -5.886
## URBAN1:T145            -0.0460297  0.0084264  -5.463
## URBAN1:T152            -0.0289733  0.0084662  -3.422
## URBAN1:T155            -0.0325769  0.0093852  -3.471
## T121:PRIMARYHHFARM     -0.0341761  0.0207243  -1.649
## T123:PRIMARYHHFARM     -0.0152346  0.0208186  -0.732
## T131:PRIMARYHHFARM     -0.0328921  0.0542515  -0.606
## T132:PRIMARYHHFARM     -0.0411895  0.0207143  -1.988
## T137:PRIMARYHHFARM     -0.0350057  0.0206902  -1.692
## T141:PRIMARYHHFARM     -0.0323764  0.0206371  -1.569
## T142:PRIMARYHHFARM     -0.0296306  0.0206965  -1.432
## T143:PRIMARYHHFARM     -0.0725540  0.0208218  -3.485
## T145:PRIMARYHHFARM     -0.0323826  0.0206788  -1.566
## T152:PRIMARYHHFARM     -0.0325656  0.0206666  -1.576
## T155:PRIMARYHHFARM     -0.0048046  0.0239085  -0.201
## T121:PRIMARYHHFISH      0.0288273  0.0820042   0.352
## T132:PRIMARYHHFISH      0.0357645  0.0747390   0.479
## T137:PRIMARYHHFISH      0.0281831  0.0798660   0.353
## T141:PRIMARYHHFISH      0.0442552  0.0827339   0.535
## T142:PRIMARYHHFISH      0.0291210  0.0722135   0.403
## T143:PRIMARYHHFISH      0.0326858  0.0731650   0.447
## T145:PRIMARYHHFISH      0.0351151  0.0743432   0.472
## T152:PRIMARYHHFISH      0.0303029  0.0869223   0.349
## T121:PRIMARYHHGARD     -0.0300917  0.0157505  -1.911
## T123:PRIMARYHHGARD     -0.0244437  0.0160580  -1.522
## T131:PRIMARYHHGARD     -0.0328620  0.0217274  -1.512
## T132:PRIMARYHHGARD     -0.0333316  0.0157589  -2.115
## T137:PRIMARYHHGARD     -0.0289047  0.0158825  -1.820
## T141:PRIMARYHHGARD     -0.0306987  0.0158854  -1.933
## T142:PRIMARYHHGARD     -0.0293501  0.0155936  -1.882
## T143:PRIMARYHHGARD     -0.0421373  0.0156022  -2.701
## T145:PRIMARYHHGARD     -0.0323557  0.0155970  -2.074
## T152:PRIMARYHHGARD     -0.0309585  0.0156039  -1.984
## T155:PRIMARYHHGARD     -0.0364870  0.0166526  -2.191
## T121:PRIMARYHHLVST      0.0172377  0.0373374   0.462
## T123:PRIMARYHHLVST      0.0448901  0.0376230   1.193
## T131:PRIMARYHHLVST     -0.0354855  0.0509504  -0.696
## T132:PRIMARYHHLVST      0.0102568  0.0372649   0.275
## T137:PRIMARYHHLVST      0.0244022  0.0378063   0.645
## T141:PRIMARYHHLVST      0.0178229  0.0370488   0.481
## T142:PRIMARYHHLVST      0.0215238  0.0373147   0.577
## T143:PRIMARYHHLVST     -0.0044711  0.0366397  -0.122
## T145:PRIMARYHHLVST      0.0114797  0.0365627   0.314
## T152:PRIMARYHHLVST      0.0133457  0.0368259   0.362
## T155:PRIMARYHHLVST     -0.0077738  0.0415474  -0.187
## T121:PRIMARYHHOTHR     -0.0449265  0.0091304  -4.921
## T123:PRIMARYHHOTHR     -0.0352599  0.0093919  -3.754
## T131:PRIMARYHHOTHR     -0.0250123  0.0125662  -1.990
## T132:PRIMARYHHOTHR     -0.0427954  0.0089565  -4.778
## T137:PRIMARYHHOTHR     -0.0401672  0.0088472  -4.540
## T141:PRIMARYHHOTHR     -0.0358910  0.0087883  -4.084
## T142:PRIMARYHHOTHR     -0.0478567  0.0089024  -5.376
## T143:PRIMARYHHOTHR     -0.0442005  0.0086756  -5.095
## T145:PRIMARYHHOTHR     -0.0368456  0.0087355  -4.218
## T152:PRIMARYHHOTHR     -0.0489143  0.0087165  -5.612
## T155:PRIMARYHHOTHR     -0.0273605  0.0111151  -2.462
## T121:PRIMARYHHRETIRE    0.0085013  0.0076208   1.116
## T123:PRIMARYHHRETIRE   -0.0096738  0.0087065  -1.111
## T131:PRIMARYHHRETIRE   -0.0226773  0.0101822  -2.227
## T132:PRIMARYHHRETIRE   -0.0059007  0.0077780  -0.759
## T137:PRIMARYHHRETIRE    0.0035008  0.0076273   0.459
## T141:PRIMARYHHRETIRE    0.0068253  0.0079813   0.855
## T142:PRIMARYHHRETIRE   -0.0113179  0.0077892  -1.453
## T143:PRIMARYHHRETIRE    0.0031822  0.0078853   0.404
## T145:PRIMARYHHRETIRE   -0.0047902  0.0077104  -0.621
## T152:PRIMARYHHRETIRE    0.0002537  0.0079037   0.032
## T155:PRIMARYHHRETIRE    0.0113551  0.0097214   1.168
## T121:PRIMARYHHSUB      -0.0112021  0.0087526  -1.280
## T123:PRIMARYHHSUB      -0.0229826  0.0091654  -2.508
## T131:PRIMARYHHSUB      -0.0059112  0.0137822  -0.429
## T132:PRIMARYHHSUB      -0.0148314  0.0088362  -1.678
## T137:PRIMARYHHSUB      -0.0152182  0.0085786  -1.774
## T141:PRIMARYHHSUB      -0.0150387  0.0089959  -1.672
## T142:PRIMARYHHSUB      -0.0097392  0.0088196  -1.104
## T143:PRIMARYHHSUB      -0.0238762  0.0087210  -2.738
## T145:PRIMARYHHSUB      -0.0182125  0.0088387  -2.061
## T152:PRIMARYHHSUB      -0.0216936  0.0088343  -2.456
## T155:PRIMARYHHSUB      -0.0130575  0.0136407  -0.957
## 
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
qqmath(resid(m2))

plot(m2,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Logged Model Residuals")

It looks like the log transform improved the residual plots a bit. However, is this because the plot is zoomed out so much, or is it actually making a difference? Let’s remove the outliers, which are causing the zoom out, and reevaluate the effect of the log transform.

# fit model w/ log transform w/o outliers 
m3 = lmer(log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + 
          CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
          + CYEAR*T1 + URBAN*T1 + T1*PRIMARY
        + (1 | HHID), data=df_trimmed)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR *  
##     URBAN + PRIMARY * CYEAR + URBAN * PRIMARY + CYEAR * T1 +  
##     URBAN * T1 + T1 * PRIMARY + (1 | HHID)
##    Data: df_trimmed
## 
## REML criterion at convergence: 82721.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.5981 -0.5149  0.0153  0.5331  6.2093 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  HHID     (Intercept) 0.1116   0.3341  
##  Residual             0.3194   0.5651  
## Number of obs: 43583, groups:  HHID, 9623
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)             1.5618304  0.2581509   6.050
## CYEAR                   0.0430227  0.0098913   4.350
## URBAN1                  0.3160395  0.0798599   3.957
## T121                    0.0146482  0.2592921   0.056
## T123                   -0.7170148  0.2619674  -2.737
## T131                    0.2014195  0.3661220   0.550
## T132                    0.1329570  0.2593712   0.513
## T137                   -0.0269323  0.2592029  -0.104
## T141                   -0.1838794  0.2592722  -0.709
## T142                   -0.1753367  0.2592573  -0.676
## T143                    0.0678627  0.2590604   0.262
## T145                   -0.0133963  0.2588767  -0.052
## T152                   -0.1956473  0.2589818  -0.755
## T155                   -0.0024103  0.3552454  -0.007
## PRIMARYHHFARM          -0.1384962  0.1855914  -0.746
## PRIMARYHHFISH          -0.2240370  0.6854126  -0.327
## PRIMARYHHGARD          -0.1492003  0.1436812  -1.038
## PRIMARYHHLVST          -0.8132208  0.3327167  -2.444
## PRIMARYHHOTHR           0.1795430  0.0821877   2.185
## PRIMARYHHRETIRE        -0.1162181  0.0755399  -1.539
## PRIMARYHHSUB            0.0042437  0.0795875   0.053
## CYEAR:URBAN1            0.0035260  0.0011303   3.120
## CYEAR:PRIMARYHHFARM    -0.0063348  0.0013276  -4.772
## CYEAR:PRIMARYHHFISH     0.0033799  0.0072426   0.467
## CYEAR:PRIMARYHHGARD     0.0089151  0.0013218   6.745
## CYEAR:PRIMARYHHLVST     0.0095225  0.0028547   3.336
## CYEAR:PRIMARYHHOTHR    -0.0053007  0.0014460  -3.666
## CYEAR:PRIMARYHHRETIRE   0.0166843  0.0015656  10.657
## CYEAR:PRIMARYHHSUB      0.0004642  0.0016082   0.289
## URBAN1:PRIMARYHHFARM   -0.0394113  0.0567223  -0.695
## URBAN1:PRIMARYHHFISH    0.1679606  0.2641293   0.636
## URBAN1:PRIMARYHHGARD   -0.0259260  0.0397746  -0.652
## URBAN1:PRIMARYHHLVST    0.0652337  0.0798325   0.817
## URBAN1:PRIMARYHHOTHR   -0.0169596  0.0224079  -0.757
## URBAN1:PRIMARYHHRETIRE  0.0504328  0.0270507   1.864
## URBAN1:PRIMARYHHSUB     0.1086176  0.0284339   3.820
## CYEAR:T121              0.0069811  0.0099227   0.704
## CYEAR:T123              0.0262604  0.0100198   2.621
## CYEAR:T131              0.0095234  0.0139345   0.683
## CYEAR:T132              0.0079582  0.0099176   0.802
## CYEAR:T137             -0.0019718  0.0099060  -0.199
## CYEAR:T141             -0.0109794  0.0099049  -1.108
## CYEAR:T142              0.0027835  0.0099058   0.281
## CYEAR:T143             -0.0116312  0.0099099  -1.174
## CYEAR:T145             -0.0160639  0.0099062  -1.622
## CYEAR:T152             -0.0036789  0.0099088  -0.371
## CYEAR:T155             -0.0228148  0.0138438  -1.648
## URBAN1:T121            -0.3438524  0.0821327  -4.187
## URBAN1:T123             0.2240949  0.0876493   2.557
## URBAN1:T131            -0.2653309  0.0991733  -2.675
## URBAN1:T132            -0.3924686  0.0822641  -4.771
## URBAN1:T137            -0.3556785  0.0813637  -4.371
## URBAN1:T141            -0.1330007  0.0832333  -1.598
## URBAN1:T142            -0.2787304  0.0831610  -3.352
## URBAN1:T143            -0.2763176  0.0816202  -3.385
## URBAN1:T145            -0.3506235  0.0825019  -4.250
## URBAN1:T152            -0.0183336  0.0829218  -0.221
## URBAN1:T155            -0.1033889  0.0909590  -1.137
## T121:PRIMARYHHFARM     -0.1926021  0.1867068  -1.032
## T123:PRIMARYHHFARM      0.2249867  0.1876227   1.199
## T131:PRIMARYHHFARM     -0.1688836  0.4902134  -0.345
## T132:PRIMARYHHFARM     -0.1689360  0.1865117  -0.906
## T137:PRIMARYHHFARM     -0.0277265  0.1863729  -0.149
## T141:PRIMARYHHFARM     -0.0762178  0.1858284  -0.410
## T142:PRIMARYHHFARM     -0.0282780  0.1864352  -0.152
## T143:PRIMARYHHFARM     -0.1295494  0.1874575  -0.691
## T145:PRIMARYHHFARM     -0.1111607  0.1861745  -0.597
## T152:PRIMARYHHFARM     -0.1011704  0.1860285  -0.544
## T155:PRIMARYHHFARM      0.5160301  0.2172880   2.375
## T121:PRIMARYHHFISH      0.3568243  0.7492528   0.476
## T132:PRIMARYHHFISH      0.3194231  0.6909858   0.462
## T137:PRIMARYHHFISH      0.2834861  0.7345091   0.386
## T141:PRIMARYHHFISH      0.5274449  0.7599429   0.694
## T142:PRIMARYHHFISH      0.2415545  0.6709912   0.360
## T143:PRIMARYHHFISH      0.4538453  0.6794552   0.668
## T145:PRIMARYHHFISH      0.3367605  0.6878997   0.490
## T152:PRIMARYHHFISH      0.1305119  0.7885433   0.166
## T121:PRIMARYHHGARD     -0.1621139  0.1452369  -1.116
## T123:PRIMARYHHGARD      0.0771567  0.1479006   0.522
## T131:PRIMARYHHGARD     -0.2279779  0.2025880  -1.125
## T132:PRIMARYHHGARD     -0.0845802  0.1450825  -0.583
## T137:PRIMARYHHGARD      0.1198030  0.1462495   0.819
## T141:PRIMARYHHGARD      0.0125943  0.1460937   0.086
## T142:PRIMARYHHGARD      0.0062095  0.1437221   0.043
## T143:PRIMARYHHGARD     -0.0449206  0.1437907  -0.312
## T145:PRIMARYHHGARD     -0.0809027  0.1437526  -0.563
## T152:PRIMARYHHGARD     -0.0236003  0.1437133  -0.164
## T155:PRIMARYHHGARD      0.0757281  0.1540392   0.492
## T121:PRIMARYHHLVST      0.4768837  0.3370989   1.415
## T123:PRIMARYHHLVST      1.1860814  0.3400202   3.488
## T131:PRIMARYHHLVST      0.1853870  0.4615335   0.402
## T132:PRIMARYHHLVST      0.5719443  0.3365163   1.700
## T137:PRIMARYHHLVST      0.8743785  0.3409695   2.564
## T141:PRIMARYHHLVST      0.7971393  0.3345954   2.382
## T142:PRIMARYHHLVST      0.7258149  0.3368771   2.155
## T143:PRIMARYHHLVST      0.3801149  0.3312613   1.147
## T145:PRIMARYHHLVST      0.5781784  0.3306270   1.749
## T152:PRIMARYHHLVST      0.5408523  0.3327848   1.625
## T155:PRIMARYHHLVST      0.4866854  0.3779092   1.288
## T121:PRIMARYHHOTHR     -0.3280062  0.0837051  -3.919
## T123:PRIMARYHHOTHR     -0.1298177  0.0861693  -1.507
## T131:PRIMARYHHOTHR     -0.1703986  0.1156399  -1.474
## T132:PRIMARYHHOTHR     -0.1887592  0.0821778  -2.297
## T137:PRIMARYHHOTHR     -0.2041672  0.0812045  -2.514
## T141:PRIMARYHHOTHR     -0.2007368  0.0807209  -2.487
## T142:PRIMARYHHOTHR     -0.3418783  0.0815589  -4.192
## T143:PRIMARYHHOTHR     -0.2489674  0.0797648  -3.121
## T145:PRIMARYHHOTHR     -0.1821113  0.0802630  -2.269
## T152:PRIMARYHHOTHR     -0.3405880  0.0800679  -4.254
## T155:PRIMARYHHOTHR      0.0591478  0.1023612   0.578
## T121:PRIMARYHHRETIRE    0.0861324  0.0716166   1.203
## T123:PRIMARYHHRETIRE   -0.0861180  0.0812796  -1.060
## T131:PRIMARYHHRETIRE   -0.2966162  0.0952242  -3.115
## T132:PRIMARYHHRETIRE   -0.0235444  0.0729320  -0.323
## T137:PRIMARYHHRETIRE    0.1376026  0.0713851   1.928
## T141:PRIMARYHHRETIRE    0.1580941  0.0747122   2.116
## T142:PRIMARYHHRETIRE   -0.0721954  0.0725979  -0.994
## T143:PRIMARYHHRETIRE    0.1231290  0.0737136   1.670
## T145:PRIMARYHHRETIRE    0.0111629  0.0720137   0.155
## T152:PRIMARYHHRETIRE   -0.0228738  0.0737762  -0.310
## T155:PRIMARYHHRETIRE    0.3507000  0.0915713   3.830
## T121:PRIMARYHHSUB      -0.0354432  0.0802377  -0.442
## T123:PRIMARYHHSUB      -0.1092376  0.0836217  -1.306
## T131:PRIMARYHHSUB      -0.0610588  0.1266517  -0.482
## T132:PRIMARYHHSUB      -0.0372753  0.0809144  -0.461
## T137:PRIMARYHHSUB      -0.0350315  0.0784096  -0.447
## T141:PRIMARYHHSUB      -0.0803380  0.0826573  -0.972
## T142:PRIMARYHHSUB       0.0334957  0.0804020   0.417
## T143:PRIMARYHHSUB      -0.1176882  0.0797352  -1.476
## T145:PRIMARYHHSUB      -0.1458065  0.0805663  -1.810
## T152:PRIMARYHHSUB      -0.1596282  0.0808367  -1.975
## T155:PRIMARYHHSUB       0.0341838  0.1246815   0.274
## 
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
qqmath(resid(m3))

plot(m3,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Residuals of log model without outliers")

We see that indeed, the log transformation did not do much to help the residuals plot. We proceed without a log transform for sake of simplicity and for interpretability reasons.

Model Selection

primary is significant

# no PRIMARY
m1 = lmer(THOUSANDS ~ CYEAR + URBAN + T1 + (1 |HHID), data=df, REML=FALSE)

# PRIMARY
m2 = lmer(THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY +(1 |HHID), data=df, REML=FALSE)

anova(m1, m2)
## Data: df
## Models:
## m1: THOUSANDS ~ CYEAR + URBAN + T1 + (1 | HHID)
## m2: THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + (1 | HHID)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## m1   16 368977 369116 -184472   368945                         
## m2   23 368841 369041 -184398   368795 149.28  7  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2 way interactions

variables_of_interest = c("CYEAR * URBAN", 
                          "CYEAR * T1", 
                          "CYEAR * PRIMARY", 
                          "URBAN * T1", 
                          "URBAN * PRIMARY", 
                          "T1 * PRIMARY")

remaining_variables_of_interest = variables_of_interest

current_variables = c("CYEAR",
                      "URBAN",
                      "T1",
                      "PRIMARY",
                      "(1|HHID)")
current_formula = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))

while(TRUE){
  print("current baseline")
  print(current_variables)
  # get baseline
  current_formula = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
  baseline = lmer(current_formula, data=df, REML=FALSE)
  
  # loop through remaining variables of interest and create a new model
  models = c()
  for (var in remaining_variables_of_interest){
    curr_var_loop = c(current_variables, var)
    current_formula = as.formula(paste("THOUSANDS~ ",paste(curr_var_loop, collapse="+"),sep = ""))
    new_model = lmer(current_formula, data=df, REML=FALSE)
    models = c(models, new_model)
  }
  
  # perform anova on baseline and a new model, record the resulting p-value
  p_vals = c()
  for (m in models){
    anova_result = anova(baseline, m)
    p_vals= c(p_vals, anova_result$`Pr(>Chisq)`[2])
  }
  
  # if the smallest p-value is greater than 0.05, stop because no new variable is significant
  if (min(p_vals)>0.05){
    print("no remaining significant variables of interest")
    break
  }
  print(paste("selected", remaining_variables_of_interest[which.min(p_vals)], ", p-value of", min(p_vals) ))
  
  # add most signficant variable to model
  current_variables = c(current_variables, remaining_variables_of_interest[which.min(p_vals)])
  
  # remove the most significant variable from search space
  remaining_variables_of_interest = remaining_variables_of_interest[-which.min(p_vals)]
  if (length(remaining_variables_of_interest)==0){
    break
  }
}
## [1] "current baseline"
## [1] "CYEAR"    "URBAN"    "T1"       "PRIMARY"  "(1|HHID)"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected CYEAR * T1 , p-value of 3.54917455359114e-104"
## [1] "current baseline"
## [1] "CYEAR"      "URBAN"      "T1"         "PRIMARY"    "(1|HHID)"  
## [6] "CYEAR * T1"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected CYEAR * PRIMARY , p-value of 7.1108351681761e-64"
## [1] "current baseline"
## [1] "CYEAR"           "URBAN"           "T1"              "PRIMARY"        
## [5] "(1|HHID)"        "CYEAR * T1"      "CYEAR * PRIMARY"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected T1 * PRIMARY , p-value of 8.23281763048671e-18"
## [1] "current baseline"
## [1] "CYEAR"           "URBAN"           "T1"              "PRIMARY"        
## [5] "(1|HHID)"        "CYEAR * T1"      "CYEAR * PRIMARY" "T1 * PRIMARY"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected URBAN * T1 , p-value of 6.45576046737603e-17"
## [1] "current baseline"
## [1] "CYEAR"           "URBAN"           "T1"              "PRIMARY"        
## [5] "(1|HHID)"        "CYEAR * T1"      "CYEAR * PRIMARY" "T1 * PRIMARY"   
## [9] "URBAN * T1"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected CYEAR * URBAN , p-value of 0.000373927288152989"
## [1] "current baseline"
##  [1] "CYEAR"           "URBAN"           "T1"              "PRIMARY"        
##  [5] "(1|HHID)"        "CYEAR * T1"      "CYEAR * PRIMARY" "T1 * PRIMARY"   
##  [9] "URBAN * T1"      "CYEAR * URBAN"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "no remaining significant variables of interest"
two_way_model = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
two_way_model
## THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + (1 | HHID) + CYEAR * 
##     T1 + CYEAR * PRIMARY + T1 * PRIMARY + URBAN * T1 + CYEAR * 
##     URBAN

3 way interaction

# CYEAR, URBAN, T1, PRIMARY
variables_of_interest = c("CYEAR * URBAN * T1", 
                          "CYEAR * URBAN * PRIMARY",
                          "CYEAR * T1 * PRIMARY", 
                          "URBAN * CYEAR * PRIMARY")

remaining_variables_of_interest = variables_of_interest

current_formula = two_way_model

while(TRUE){
  print("current baseline")
  print(current_variables)
  # get baseline
  current_formula = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
  baseline = lmer(current_formula, data=df, REML=FALSE)
  
  # loop through remaining variables of interest and create a new model
  models = c()
  for (var in remaining_variables_of_interest){
    curr_var_loop = c(current_variables, var)
    current_formula = as.formula(paste("THOUSANDS ~ ",paste(curr_var_loop, collapse="+"),sep = ""))
    new_model = lmer(current_formula, data=df, REML=FALSE)
    models = c(models, new_model)
  }
  
  p_vals = c()
  for (m in models){
    anova_result = anova(baseline, m)
    p_vals= c(p_vals, anova_result$`Pr(>Chisq)`[2])
  }
  
  if (min(p_vals)>0.05){
    print("no remaining significant variables of interest")
    break
  }
  print(paste("selected", remaining_variables_of_interest[which.min(p_vals)], ", p-value of", min(p_vals) ))
  
  # add most signficant variable to model
  current_variables = c(current_variables, remaining_variables_of_interest[which.min(p_vals)])
  
  # remove the most significant variable from search space
  remaining_variables_of_interest = remaining_variables_of_interest[-which.min(p_vals)]
  if (length(remaining_variables_of_interest)==0){
    break
  }
}
## [1] "current baseline"
##  [1] "CYEAR"           "URBAN"           "T1"              "PRIMARY"        
##  [5] "(1|HHID)"        "CYEAR * T1"      "CYEAR * PRIMARY" "T1 * PRIMARY"   
##  [9] "URBAN * T1"      "CYEAR * URBAN"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected CYEAR * T1 * PRIMARY , p-value of 5.47163495513843e-20"
## [1] "current baseline"
##  [1] "CYEAR"                "URBAN"                "T1"                  
##  [4] "PRIMARY"              "(1|HHID)"             "CYEAR * T1"          
##  [7] "CYEAR * PRIMARY"      "T1 * PRIMARY"         "URBAN * T1"          
## [10] "CYEAR * URBAN"        "CYEAR * T1 * PRIMARY"
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## [1] "selected CYEAR * URBAN * T1 , p-value of 1.31805736103392e-11"
## [1] "current baseline"
##  [1] "CYEAR"                "URBAN"                "T1"                  
##  [4] "PRIMARY"              "(1|HHID)"             "CYEAR * T1"          
##  [7] "CYEAR * PRIMARY"      "T1 * PRIMARY"         "URBAN * T1"          
## [10] "CYEAR * URBAN"        "CYEAR * T1 * PRIMARY" "CYEAR * URBAN * T1"
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## [1] "no remaining significant variables of interest"
three_way_model = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
three_way_model
## THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + (1 | HHID) + CYEAR * 
##     T1 + CYEAR * PRIMARY + T1 * PRIMARY + URBAN * T1 + CYEAR * 
##     URBAN + CYEAR * T1 * PRIMARY + CYEAR * URBAN * T1

four way model

m1 = lmer(THOUSANDS ~ URBAN +  CYEAR +  T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY 
           + CYEAR*T1 + URBAN*T1 + CYEAR * T1 * PRIMARY + CYEAR * URBAN * T1 
          + (1|HHID), REML = FALSE, data = df)
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
m2 = lmer(THOUSANDS ~ URBAN +  CYEAR +  T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY 
           + CYEAR*T1 + URBAN*T1 + CYEAR * T1 * PRIMARY + CYEAR * URBAN * T1 
          + CYEAR * URBAN * T1 * PRIMARY
          + (1|HHID), REML = FALSE, data = df)
## fixed-effect model matrix is rank deficient so dropping 52 columns / coefficients
anova(m1, m2)
## Data: df
## Models:
## m1: THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR * URBAN + CYEAR * 
## m1:     PRIMARY + CYEAR * T1 + URBAN * T1 + CYEAR * T1 * PRIMARY + 
## m1:     CYEAR * URBAN * T1 + (1 | HHID)
## m2: THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR * URBAN + CYEAR * 
## m2:     PRIMARY + CYEAR * T1 + URBAN * T1 + CYEAR * T1 * PRIMARY + 
## m2:     CYEAR * URBAN * T1 + CYEAR * URBAN * T1 * PRIMARY + (1 | 
## m2:     HHID)
##    npar    AIC    BIC  logLik deviance  Chisq  Df Pr(>Chisq)    
## m1  210 367724 369547 -183652   367304                          
## m2  334 367667 370568 -183499   366999 304.52 124  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
four_way_model = as.formula("THOUSANDS ~ URBAN +  CYEAR +  T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY + CYEAR*T1 + URBAN*T1 + CYEAR * T1 * PRIMARY + CYEAR * URBAN * T1 + CYEAR * URBAN * T1 * PRIMARY + (1|HHID)")
four_way_model
## THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR * URBAN + CYEAR * 
##     PRIMARY + CYEAR * T1 + URBAN * T1 + CYEAR * T1 * PRIMARY + 
##     CYEAR * URBAN * T1 + CYEAR * URBAN * T1 * PRIMARY + (1 | 
##     HHID)

final model

final_model = lmer(THOUSANDS ~ URBAN +  CYEAR +  T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY 
           + CYEAR*T1 + URBAN*T1 
          + (1|HHID), REML = FALSE, data = df)
summary(final_model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR * URBAN + CYEAR *  
##     PRIMARY + CYEAR * T1 + URBAN * T1 + (1 | HHID)
##    Data: df
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  367974.6  368434.9 -183934.3  367868.6     43618 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -13.898  -0.286  -0.049   0.164  46.163 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  HHID     (Intercept) 141.5    11.90   
##  Residual             201.0    14.18   
## Number of obs: 43671, groups:  HHID, 9628
## 
## Fixed effects:
##                        Estimate Std. Error t value
## (Intercept)           -47.71427    6.11412  -7.804
## URBAN1                  9.62540    1.94467   4.950
## CYEAR                   2.74769    0.24469  11.229
## T121                   47.87037    6.14575   7.789
## T123                   39.40122    6.18778   6.368
## T131                   19.85373    8.60380   2.308
## T132                   48.29414    6.14752   7.856
## T137                   49.57922    6.14380   8.070
## T141                   48.05763    6.14415   7.822
## T142                   48.60362    6.14609   7.908
## T143                   49.28215    6.14622   8.018
## T145                   49.23402    6.14583   8.011
## T152                   47.12451    6.14404   7.670
## T155                   42.61613    8.51865   5.003
## PRIMARYHHFARM           1.77776    0.49518   3.590
## PRIMARYHHFISH           1.54722    2.21080   0.700
## PRIMARYHHGARD           0.82057    0.54850   1.496
## PRIMARYHHLVST           1.11212    0.88774   1.253
## PRIMARYHHOTHR          -0.89804    0.60974  -1.473
## PRIMARYHHRETIRE        -2.18280    0.71044  -3.072
## PRIMARYHHSUB            0.66017    0.54497   1.211
## URBAN1:CYEAR            0.11092    0.02641   4.200
## CYEAR:PRIMARYHHFARM    -0.35200    0.03218 -10.938
## CYEAR:PRIMARYHHFISH    -0.10889    0.16054  -0.678
## CYEAR:PRIMARYHHGARD    -0.13247    0.03219  -4.116
## CYEAR:PRIMARYHHLVST    -0.10558    0.06791  -1.555
## CYEAR:PRIMARYHHOTHR    -0.05971    0.03541  -1.686
## CYEAR:PRIMARYHHRETIRE   0.14120    0.03804   3.712
## CYEAR:PRIMARYHHSUB     -0.13978    0.03726  -3.752
## CYEAR:T121             -1.86217    0.24607  -7.568
## CYEAR:T123             -1.55856    0.24872  -6.266
## CYEAR:T131             -0.44391    0.34442  -1.289
## CYEAR:T132             -1.80510    0.24598  -7.338
## CYEAR:T137             -2.05459    0.24586  -8.357
## CYEAR:T141             -2.22682    0.24579  -9.060
## CYEAR:T142             -1.99293    0.24559  -8.115
## CYEAR:T143             -2.14820    0.24572  -8.742
## CYEAR:T145             -2.30401    0.24567  -9.378
## CYEAR:T152             -2.13290    0.24567  -8.682
## CYEAR:T155             -1.99448    0.34542  -5.774
## URBAN1:T121            -9.27064    2.10199  -4.410
## URBAN1:T123            -3.67630    2.13020  -1.726
## URBAN1:T131           -12.06342    2.62203  -4.601
## URBAN1:T132            -9.60730    2.09039  -4.596
## URBAN1:T137            -9.78458    2.09554  -4.669
## URBAN1:T141            -7.25441    2.09974  -3.455
## URBAN1:T142           -10.94066    2.14117  -5.110
## URBAN1:T143            -9.57577    2.11082  -4.537
## URBAN1:T145            -8.70572    2.09215  -4.161
## URBAN1:T152            -5.44761    2.10445  -2.589
## URBAN1:T155            -5.38565    2.31150  -2.330
## 
## Correlation matrix not shown by default, as p = 51 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

model diagnostics

qqmath(resid(final_model))

plot(final_model,resid(.,scaled=TRUE)~fitted(.),abline=0, main="Final Model Residuals")

vif(final_model)
##                       GVIF Df GVIF^(1/(2*Df))
## URBAN         4.290689e+01  1        6.550335
## CYEAR         6.981445e+02  1       26.422424
## T1            5.303258e+08 11        2.492127
## PRIMARY       5.295697e+03  7        1.844992
## URBAN:CYEAR   4.458281e+00  1        2.111464
## CYEAR:PRIMARY 1.026724e+04  7        1.934338
## CYEAR:T1      3.844677e+08 11        2.455958
## URBAN:T1      2.610188e+04 11        1.587661

cross validation

HHIDs = unique(df$HHID)

# do random sampling
n_times = 10

# 80 - 20 test split
test_size = 5 

our_MSEs = c()
two_way_MSEs = c()
three_way_MSEs = c()
four_way_MSEs = c()
set.seed(0)
for(i in 1:test_size){
  HHIDs = sample(HHIDs)
  # split into train and test folds
  test_indices = seq(i, length(HHIDs), test_size)
  train_fold_HHIDs = HHIDs[-test_indices]
  test_fold_HHIDs = HHIDs[test_indices]
  
  train_fold = df[df$HHID %in% train_fold_HHIDs,]
  test_fold = df[df$HHID %in% test_fold_HHIDs,]
  
  #create a new model on train_fold
  our_model = lmer(THOUSANDS ~ URBAN +  CYEAR +  T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY 
           + CYEAR*T1 + URBAN*T1 
          + (1|HHID), REML = FALSE, data = train_fold)
  predictions = predict(our_model, test_fold, allow.new.levels=TRUE)
  residuals = (predictions) - (test_fold$THOUSANDS)
  MSE = mean(residuals^2)
  our_MSEs = c(our_MSEs, MSE)
  
  two_way = lmer(two_way_model, data=train_fold, REML=FALSE)
  predictions = predict(two_way, test_fold, allow.new.levels=TRUE)
  residuals = (predictions) - (test_fold$THOUSANDS)
  MSE = mean(residuals^2)
  two_way_MSEs = c(two_way_MSEs, MSE)
  
  three_way = lmer(three_way_model, data=train_fold, REML=FALSE)
  predictions = predict(three_way, test_fold, allow.new.levels=TRUE)
  residuals = (predictions) - (test_fold$THOUSANDS)
  MSE = mean(residuals^2)
  three_way_MSEs = c(three_way_MSEs, MSE)  
  
  four_way = lmer(four_way_model, data=train_fold, REML=FALSE)
  predictions = predict(four_way, test_fold, allow.new.levels=TRUE)
  residuals = (predictions) - (test_fold$THOUSANDS)
  MSE = mean(residuals^2)
  four_way_MSEs = c(four_way_MSEs, MSE)
}
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 10 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 55 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 10 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 57 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 10 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 55 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 9 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 58 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 10 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 55 columns / coefficients
mean(our_MSEs)
## [1] 240.0926
sd(our_MSEs)
## [1] 21.19593
mean(two_way_MSEs)
## [1] 239.9553
sd(two_way_MSEs)
## [1] 21.07868
mean(three_way_MSEs)
## [1] 240.6549
sd(three_way_MSEs)
## [1] 20.79697
mean(four_way_MSEs)
## [1] 244.3279
sd(four_way_MSEs)
## [1] 22.25931

Income Inequality

library(ineq)
library(tidyverse)
#gini index overtime
gini.years = df %>%
  dplyr::select(HHID,HHINCPC_CPI,WAVE) %>%
  arrange(HHINCPC_CPI) %>%
  transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
  group_by(WAVE) %>%
  summarise(index = ineq(HHINCPC_CPI,type = c("Gini")))

p1 = ggplot(gini.years,aes(x = factor(WAVE),y = index)) +
  geom_line() +
  geom_point()+
  geom_text(aes(label = round(index, 2)),hjust = -0.2,vjust = 0.5) +
  ggtitle("Gini Index Over Year in China") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

years = c(1989,1991,1993,1997,2000,2004,2006,2009,2011,2015)
cols <- c("#ffe6e6","#ffcccc","#ff8080","#ff8080","#ff1a1a","#ff1a1a","#b30000","#660000","#330000")
plot(Lc(df[df$WAVE ==1989,]$HHINCPC_CPI),lwd=2,col = "#ffe6e6")
for (i in 2:length(years)){
  lines(Lc(df[df$WAVE==years[i],]$HHINCPC_CPI),col=cols[i],lwd=2)
}

#png("gini_year.png")
#print(p1)
#dev.off()
#gini index overtime and provinces
df =  df %>%
  mutate(city = case_when(T1 == 11 ~ "Beijing",
                          T1 == 21  ~ "LiaoNing",
                          T1 == 23  ~ "Heilongjiang",
                          T1 == 31  ~ "Shanghai",
                          T1 == 32  ~ "Jiangsu",
                          T1 == 33  ~ "Zhejiang",
                          T1 == 37  ~ "Shandong",
                          T1 == 41  ~ "Henan",
                          T1 == 42  ~ "Hubei",
                          T1 == 43  ~ "Hunan",
                          T1 == 45  ~ "Guangxi",
                          T1 == 52  ~ "Guizhou",
                          T1 == 53  ~ "Yunnan",
                          T1 == 55  ~ "Chongqing",
                          T1 == 61  ~ "Shanxi"))
gini.years$city = c("China","China","China","China","China","China","China","China","China","China")
gini.years.province = df %>%
  dplyr::select(HHID,HHINCPC_CPI,T1,WAVE,city) %>%
  #arrange(HHINCPC_CPI) %>%
  transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
  group_by(city,WAVE) %>%
  summarise(index = ineq(HHINCPC_CPI,type = c("Gini")))

p2 = ggplot(gini.years.province,aes(x = factor(WAVE),y = index,group = city,color = city)) +
  geom_line() +
  geom_point()+
  geom_line(data = gini.years,aes(x = factor(WAVE),y = index),color = "black") + 
  ggtitle("Gini Index Over Year by Province") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

#png("gini_year_province.png")
#print(p2)
#dev.off()
df =  df %>%
  mutate(primary_income = case_when(PRIMARY == "HHBUS"~ HHBUS,
                          PRIMARY == "HHFARM"~ HHFARM,
                          PRIMARY == "HHFISH"~ HHFISH,
                          PRIMARY == "HHGARD"~ HHGARD,
                          PRIMARY == "HHLVST"~ HHLVST,
                          PRIMARY == "HHNRWAGE"~ HHNRWAGE,
                          PRIMARY == "HHOTHR"~ HHOTHR,
                          PRIMARY == "HHRETIRE"~ HHRETIRE,
                          PRIMARY == "HHSUB"~ HHSUB))
gini.years.primary = df %>%
  dplyr::select(HHID,primary_income,T1,WAVE,city,PRIMARY) %>%
  transform(primary_income = ifelse(primary_income < 0, 0, primary_income)) %>%
  group_by(PRIMARY,WAVE) %>%
  summarise(index = ineq(primary_income,type = c("Gini")))

ggplot(gini.years.primary,aes(x = WAVE,y = index,group = PRIMARY,color = PRIMARY)) +
  geom_line()+
  #geom_point()+
  ggtitle("Gini Index Over Year by Primary Outcome") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(gini.years.primary,aes(x = WAVE,y = index)) +
  geom_line() +
  geom_point()+
  #geom_text(aes(label = round(index, 3),hjust = -0.3,vjust = 0.5)) +
  ggtitle("Gini Index Over Year") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))+
  facet_wrap(~PRIMARY)

gini.years.urban = df %>%
  dplyr::select(HHID,primary_income,T1,WAVE,URBAN) %>%
  transform(primary_income = ifelse(primary_income < 0, 0, primary_income)) %>%
  group_by(URBAN,WAVE) %>%
  summarise(index = ineq(primary_income,type = c("Gini")))

ggplot(gini.years.urban,aes(x = WAVE,y = index,group = URBAN,color = URBAN)) +
  geom_line()+
  #geom_point()+
  ggtitle("Gini Index Over Year by Urban/Rural") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

ratio.years = df %>%
  dplyr::select(HHID,HHINCPC_CPI,WAVE,city) %>%
  transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
  group_by(WAVE) %>%
  mutate(top10 = ifelse(HHINCPC_CPI >= quantile(HHINCPC_CPI,0.9),HHINCPC_CPI,0),
        bottom10 = ifelse(HHINCPC_CPI <= quantile(HHINCPC_CPI,0.1),HHINCPC_CPI,0)) %>%
  summarise(ratio = sum(bottom10)/sum(top10),sumb = sum(bottom10),sumt =sum(top10))

ggplot(ratio.years,aes(x = WAVE,y = ratio)) +
  geom_line() +
  geom_point()+
  geom_text(aes(label = round(ratio, 3),hjust = -0.3,vjust = 0.5)) +
  ggtitle("Gini Index Over Year") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

ratio.years.province = df %>%
  dplyr::select(HHID,HHINCPC_CPI,WAVE,city) %>%
  transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
  group_by(WAVE,city) %>%
  mutate(top10 = ifelse(HHINCPC_CPI >= quantile(HHINCPC_CPI,0.9),HHINCPC_CPI,0),
        bottom10 = ifelse(HHINCPC_CPI <= quantile(HHINCPC_CPI,0.1),HHINCPC_CPI,0)) %>%
  summarise(ratio = sum(bottom10)/sum(top10),sumb = sum(bottom10),sumt =sum(top10))

ggplot(ratio.years.province,aes(x = WAVE,y = ratio,group = city,color = city)) +
  geom_line()+
  geom_point()+
  ggtitle("Gini Index Over Year by Province") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(ratio.years.province,aes(x = WAVE,y = ratio)) +
  geom_line() +
  geom_point()+
  #geom_text(aes(label = round(index, 3),hjust = -0.3,vjust = 0.5)) +
  ggtitle("Gini Index Over Year") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))+
  facet_wrap(~city)

ratio.years.primary = df %>%
  dplyr::select(HHID,HHINCPC_CPI,WAVE,PRIMARY) %>%
  transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
  group_by(WAVE,PRIMARY) %>%
  mutate(top10 = ifelse(HHINCPC_CPI >= quantile(HHINCPC_CPI,0.9),HHINCPC_CPI,0),
        bottom10 = ifelse(HHINCPC_CPI <= quantile(HHINCPC_CPI,0.1),HHINCPC_CPI,0)) %>%
  summarise(ratio = sum(bottom10)/sum(top10),sumb = sum(bottom10),sumt =sum(top10))

ggplot(ratio.years.primary,aes(x = WAVE,y = ratio,group = PRIMARY,color = PRIMARY)) +
  geom_line()+
  geom_point()+
  ggtitle("Gini Index Over Year by Province") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(ratio.years.primary,aes(x = WAVE,y = ratio)) +
  geom_line() +
  geom_point()+
  #geom_text(aes(label = round(index, 3),hjust = -0.3,vjust = 0.5)) +
  ggtitle("Gini Index Over Year") +
  xlab("Year") + ylab("Gini Index") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))+
  facet_wrap(~PRIMARY)

Beta coefficient test

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
coefs <- names(fixef(final_model))
#year
coeftest_year <- linearHypothesis(final_model, "CYEAR=0")
#urban/rural
coeftest_urban <- linearHypothesis(final_model, "URBAN1=0")
#province
coeftest_province <- linearHypothesis(final_model, coefs[4:14])
#primary
coeftest_primary <- linearHypothesis(final_model, coefs[15:21])
#year:primary
coeftest_yearprimary <- linearHypothesis(final_model, coefs[22:28])
#year:urban
coeftest_yearurban <- linearHypothesis(final_model, coefs[29])
#year:province
coeftest_yearprovince <- linearHypothesis(final_model, coefs[30:40])
#urban:province
coeftest_urbanprovince <- linearHypothesis(final_model, coefs[41:51])
 

coeftest <- na.omit(rbind(coeftest_year, coeftest_urban, coeftest_province, coeftest_primary, coeftest_yearprimary, coeftest_yearurban, coeftest_yearprovince, coeftest_urbanprovince))
var <- c("Year","Urban", "Province", "Primary Income Source", "Year:Primary Source", "Year:Urban", "Year:Province", "Urban:Province")
coeftest <- cbind(var, coeftest)
kable(coeftest, col.names = c("Variable", "Degrees of Freedom", "Chi-squared", "p-value"), caption="Hypothesis Testing of Urban Coefficients",align="c")
Hypothesis Testing of Urban Coefficients
Variable Degrees of Freedom Chi-squared p-value
2 Year 1 126.09296 0.0000000
21 Urban 1 24.49895 0.0000007
22 Province 11 166.26294 0.0000000
23 Primary Income Source 7 47.88564 0.0000000
24 Year:Primary Source 7 311.83626 0.0000000
25 Year:Urban 1 14.07428 0.0001757
26 Year:Province 11 486.58764 0.0000000
27 Urban:Province 11 69.94011 0.0000000
png("beta-test.png")
p <- tableGrob(coeftest)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2

#Variation in household income by province

yrs <- c(0,2,4,8,11,15,17,20,22,26)
xvar <- c("1989", "1991", "1993", "1997", "2000", "2004", "2006", "2009", "2011", "2015")
estimate <- as.data.frame(summary(final_model)$coefficients)
estimate <- estimate %>% dplyr::select(Estimate)

for (i in yrs) {
  prov21 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T121",]+yrs*estimate["CYEAR:T121",]
  df7 <- data.frame(prov21)
}
for (i in yrs) {
  prov23 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T123",]+yrs*estimate["CYEAR:T123",]
  df8 <- data.frame(prov23)
}
for (i in yrs) {
  prov31 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T131",]+yrs*estimate["CYEAR:T131",]
  df9 <- data.frame(prov31)
}
for (i in yrs) {
  prov32 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T132",]+yrs*estimate["CYEAR:T132",]
  df10 <- data.frame(prov32)
}
for (i in yrs) {
  prov37 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T137",]+yrs*estimate["CYEAR:T137",]
  df11 <- data.frame(prov37)
}
for (i in yrs) {
  prov41 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T141",]+yrs*estimate["CYEAR:T141",]
  df12 <- data.frame(prov41)
}
for (i in yrs) {
  prov42 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T142",]+yrs*estimate["CYEAR:T142",]
  df13 <- data.frame(prov42)
}
for (i in yrs) {
  prov43 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T143",]+yrs*estimate["CYEAR:T143",]
  df14 <- data.frame(prov43)
}
for (i in yrs) {
  prov45 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T145",]+yrs*estimate["CYEAR:T145",]
  df15 <- data.frame(prov45)
}
for (i in yrs) {
  prov52 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T152",]+yrs*estimate["CYEAR:T152",]
  df16 <- data.frame(prov52)
}
for (i in yrs) {
  prov55 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T155",]+yrs*estimate["CYEAR:T155",]
  df17 <- data.frame(prov55)
}
for (i in yrs) {
  prov11 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]
  df18 <- data.frame(prov11)
}

prov.data <- cbind(xvar, df18, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17)
prov.data$xvar <- as.numeric(as.character(prov.data$xvar))
mdf2 <- melt(prov.data, id.vars="xvar")

plot2 <- ggplot(mdf2, aes(x=xvar, y=value, group=variable, color=variable)) + 
  geom_line() +
  geom_point() +
  scale_color_discrete(name="Province",
                       labels=c("Beijing", "Liaoning", "Heilongjiang", "Shanghai", "Jiangsu", "Shandong", "Henan", "Hubei", "Hunan", "Guangxi", "Guizhou", "Chongqing")) +
  labs(title="Changes in Household Income per Capita Over Time \nby Province",
       x="Year",
       y="Beta coefficients") +
    theme(plot.title = element_text(size=20))

#Variation in household income by urban/rural status

for (i in yrs) {
  urb <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["URBAN1",]+estimate["CYEAR:URBAN1",]
  df19 <- data.frame(urb)
}
for (i in yrs) {
  rur <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]
  df20 <- data.frame(rur)
}

urb.data <- cbind(xvar, df19, df20)
urb.data$xvar <- as.numeric(as.character(urb.data$xvar))
mdf3 <- melt(urb.data, id.vars="xvar")

plot3 <- ggplot(mdf3, aes(x=xvar, y=value, group=variable, color=variable)) +
  geom_line() +
  geom_point() +
  scale_color_discrete(name="Status",
                       labels=c("Urban", "Rural")) +
  labs(title="Changes in Household Income per Capita Over Time \nby Urban/Rural Status",
       x="Year",
       y="Beta coefficients") +
  theme(plot.title = element_text(size=20))

#Variation in household income by income source

for (i in yrs) {
  bus <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]
  d <- data.frame(bus)
}

for (i in yrs) {
  farm <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHFARM",]+yrs*estimate["CYEAR:PRIMARYHHFARM",]
  df0 <- data.frame(farm)
}

for (i in yrs) {
  fish <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHFISH",]+yrs*estimate["CYEAR:PRIMARYHHFISH",]
  df1 <- data.frame(fish)
}

for (i in yrs) {
  gard <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHGARD",]+yrs*estimate["CYEAR:PRIMARYHHGARD",]
  df2 <- data.frame(gard)
}

for (i in yrs) {
  lvst <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHLVST",]+yrs*estimate["CYEAR:PRIMARYHHLVST",]
  df3 <- data.frame(lvst)
}

for (i in yrs) {
  othr <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHOTHR",]+yrs*estimate["CYEAR:PRIMARYHHOTHR",]
  df4 <- data.frame(othr)
}

for (i in yrs) {
  retire <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHRETIRE",]+yrs*estimate["CYEAR:PRIMARYHHRETIRE",]
  df5 <- data.frame(retire)
}

for (i in yrs) {
  sub <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHSUB",]+yrs*estimate["CYEAR:PRIMARYHHSUB",]
  df6 <- data.frame(sub)
}

source.data <- cbind(xvar, d, df0, df1, df2, df3, df4, df5, df6)
source.data$xvar <- as.numeric(as.character(source.data$xvar))
mdf1 <- melt(source.data, id.vars="xvar")

plot1 <- ggplot(mdf1, aes(x=xvar, y=value, group=variable, color=variable)) + 
  geom_line(aes(color=variable)) +
  geom_point() +
  scale_color_discrete(name="Income Source",
                       labels=c("Business","Farming", "Fishing", "Gardening", "Livestock", "Others", "Retirement", "Subsidies")) +
  labs(title="Changes in Household Income per Capita Over Time \nby Primary Income Source",
       x="Year",
       y="Beta coefficients") +
    theme(plot.title = element_text(size=20))